This is an automated email from the ASF dual-hosted git repository. yuqi1129 pushed a commit to branch feat/mcp-governance-task3-6 in repository https://gitbox.apache.org/repos/asf/gravitino.git
commit f6c011f08970d7a9ea1c23cde3dad7a5534d7d04 Author: yuqi <[email protected]> AuthorDate: Wed Jun 10 23:52:26 2026 +0800 [#11575] test(mcp-server): live-Gravitino authorization integration test + CI End-to-end test proving per-user authorization flows through the MCP HTTP server against a real Gravitino with authorization enabled. Verifies the three demo acceptance moments: 1. admin and bob run the same list-catalogs call and get different, authorization-scoped results (admin sees both catalogs, bob sees only the one granted to it). 2. bob (no write grant) is denied by Gravitino when creating a tag through MCP (ForbiddenException propagated as a tool error). 3. the reads and the denied write appear as audit records attributed to the correct principal (admin/bob) with allow/deny outcomes. - tests/integration/gravitino_setup.py: provisions metalake, two model catalogs, user bob, a reader role (USE_CATALOG on one catalog), and the grant — all via REST as the simple-auth service admin. - tests/integration/conftest.py: session fixtures; skips the suite unless GRAVITINO_URI / MCP_URL / MCP_METALAKE are exported. - tests/integration/test_authz_e2e.py: the three demo-moment assertions, driving the MCP server over real Streamable HTTP transport with per-principal Basic auth headers. - dev/run_authz_integration_test.sh: orchestration — configures simple auth + authorization, starts Gravitino via gravitino.sh, starts the MCP server in HTTP mode, runs pytest, and tears everything down (restoring the config). - .github/workflows/mcp-integration-test.yml: CI job that builds the distribution and runs the orchestration script. Verified locally: 3 passed; audit log shows admin=allow and bob=deny. --- .github/workflows/mcp-integration-test.yml | 82 +++++++++++ mcp-server/dev/run_authz_integration_test.sh | 179 ++++++++++++++++++++++++ mcp-server/tests/integration/__init__.py | 0 mcp-server/tests/integration/conftest.py | 69 +++++++++ mcp-server/tests/integration/gravitino_setup.py | 140 ++++++++++++++++++ mcp-server/tests/integration/test_authz_e2e.py | 160 +++++++++++++++++++++ 6 files changed, 630 insertions(+) diff --git a/.github/workflows/mcp-integration-test.yml b/.github/workflows/mcp-integration-test.yml new file mode 100644 index 0000000000..959d439e82 --- /dev/null +++ b/.github/workflows/mcp-integration-test.yml @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +name: mcp-integration-test + +on: + push: + branches: [ "main", "branch-*" ] + pull_request: + branches: [ "main", "branch-*" ] + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + changes: + runs-on: ubuntu-latest + outputs: + mcp_or_authz_changes: ${{ steps.filter.outputs.mcp_or_authz_changes }} + steps: + - uses: actions/checkout@v4 + - uses: dorny/paths-filter@de90cc6fb38fc0963ad72b210f1f284cd68cea36 + id: filter + with: + filters: | + mcp_or_authz_changes: + - 'mcp-server/**' + - 'server/**' + - 'core/src/main/java/org/apache/gravitino/authorization/**' + - 'server-common/src/main/java/org/apache/gravitino/server/authentication/**' + - '.github/workflows/mcp-integration-test.yml' + + mcp-authz-integration-test: + runs-on: ubuntu-latest + timeout-minutes: 60 + needs: changes + if: needs.changes.outputs.mcp_or_authz_changes == 'true' + steps: + - uses: actions/checkout@v4 + + - uses: ./.github/actions/setup-java-toolchains + with: + java-version: 17 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + + - name: Build Gravitino distribution + run: ./gradlew compileDistribution -x test -PskipWeb=true + + - name: Run MCP authorization integration test + env: + GRAVITINO_HOME: ${{ github.workspace }}/distribution/package + run: | + ./mcp-server/dev/run_authz_integration_test.sh + + - name: Upload server logs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: mcp-authz-it-logs + path: | + distribution/package/logs/** + mcp-server/gravitino-mcp-audit.log + mcp-server/gravitino-mcp.log + if-no-files-found: ignore diff --git a/mcp-server/dev/run_authz_integration_test.sh b/mcp-server/dev/run_authz_integration_test.sh new file mode 100755 index 0000000000..82673806b7 --- /dev/null +++ b/mcp-server/dev/run_authz_integration_test.sh @@ -0,0 +1,179 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# End-to-end authorization integration test for the Gravitino MCP server. +# +# This script: +# 1. Builds the Gravitino distribution (unless GRAVITINO_HOME is provided). +# 2. Enables simple authentication + authorization (serviceAdmins=admin). +# 3. Starts the Gravitino server. +# 4. Starts the MCP server in HTTP transport mode. +# 5. Runs the pytest integration suite (which provisions metadata as admin and +# verifies per-user authorization through MCP). +# 6. Tears everything down and restores the original config. +# +# Usage: +# ./dev/run_authz_integration_test.sh +# GRAVITINO_HOME=/path/to/distribution ./dev/run_authz_integration_test.sh + +set -euo pipefail + +# All services run on localhost; never route them through an HTTP proxy. +# httpx (MCP server -> Gravitino, test client) and curl both honour NO_PROXY. +export NO_PROXY="localhost,127.0.0.1" +export no_proxy="localhost,127.0.0.1" + +MCP_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +REPO_ROOT="$(cd "${MCP_DIR}/.." && pwd)" + +# Use the loopback literal (not "localhost") so the MCP server binds to a +# guaranteed-assignable address; on some hosts "localhost" resolves to a LAN IP. +GRAVITINO_PORT="${GRAVITINO_PORT:-8090}" +GRAVITINO_URI="http://127.0.0.1:${GRAVITINO_PORT}" +MCP_PORT="${MCP_PORT:-8000}" +MCP_URL="http://127.0.0.1:${MCP_PORT}/mcp" +MCP_METALAKE="${MCP_METALAKE:-mcp_authz_it}" +MCP_AUDIT_LOG="${MCP_DIR}/gravitino-mcp-audit.log" + +MCP_PID="" +GRAVITINO_STARTED="false" +CONF_BACKUP="" + +log() { echo "[authz-it] $*"; } + +cleanup() { + log "Tearing down..." + if [[ -n "${MCP_PID}" ]] && kill -0 "${MCP_PID}" 2>/dev/null; then + kill "${MCP_PID}" 2>/dev/null || true + wait "${MCP_PID}" 2>/dev/null || true + fi + if [[ "${GRAVITINO_STARTED}" == "true" ]]; then + "${GRAVITINO_HOME}/bin/gravitino.sh" stop || true + fi + if [[ -n "${CONF_BACKUP}" && -f "${CONF_BACKUP}" ]]; then + mv "${CONF_BACKUP}" "${GRAVITINO_HOME}/conf/gravitino.conf" + log "Restored original gravitino.conf" + fi +} +trap cleanup EXIT + +# --------------------------------------------------------------------------- +# 1. Resolve / build the Gravitino distribution +# --------------------------------------------------------------------------- +if [[ -z "${GRAVITINO_HOME:-}" ]]; then + log "GRAVITINO_HOME not set; building distribution..." + (cd "${REPO_ROOT}" && ./gradlew compileDistribution -x test) + GRAVITINO_HOME="${REPO_ROOT}/distribution/package" +fi +log "Using GRAVITINO_HOME=${GRAVITINO_HOME}" + +if [[ ! -x "${GRAVITINO_HOME}/bin/gravitino.sh" ]]; then + log "ERROR: ${GRAVITINO_HOME}/bin/gravitino.sh not found" + exit 1 +fi + +# --------------------------------------------------------------------------- +# 2. Enable simple auth + authorization +# --------------------------------------------------------------------------- +CONF="${GRAVITINO_HOME}/conf/gravitino.conf" +CONF_BACKUP="${CONF}.authz-it.bak" +cp "${CONF}" "${CONF_BACKUP}" + +# Remove any pre-existing values for the keys we manage, then append ours. +sed -i.tmp \ + -e '/^gravitino.authenticators/d' \ + -e '/^gravitino.authorization.enable/d' \ + -e '/^gravitino.authorization.serviceAdmins/d' \ + "${CONF}" +rm -f "${CONF}.tmp" +cat >> "${CONF}" <<EOF + +# --- injected by run_authz_integration_test.sh --- +gravitino.authenticators = simple +gravitino.authorization.enable = true +gravitino.authorization.serviceAdmins = admin +EOF +log "Configured simple auth + authorization (serviceAdmins=admin)" + +# --------------------------------------------------------------------------- +# 3. Start Gravitino +# --------------------------------------------------------------------------- +log "Starting Gravitino server..." +"${GRAVITINO_HOME}/bin/gravitino.sh" start +GRAVITINO_STARTED="true" + +# With simple auth enabled every request needs an Authorization header. +ADMIN_AUTH="Basic $(printf '%s' 'admin:dummy' | base64)" + +log "Waiting for Gravitino to become healthy..." +for i in $(seq 1 60); do + if curl -sf --noproxy '*' -H "Authorization: ${ADMIN_AUTH}" \ + "${GRAVITINO_URI}/api/version" >/dev/null 2>&1; then + log "Gravitino is up." + break + fi + if [[ "${i}" == "60" ]]; then + log "ERROR: Gravitino did not become healthy in time" + exit 1 + fi + sleep 2 +done + +# --------------------------------------------------------------------------- +# 4. Start the MCP server in HTTP transport mode +# --------------------------------------------------------------------------- +log "Starting MCP server (HTTP) on ${MCP_URL}..." +rm -f "${MCP_AUDIT_LOG}" +( + cd "${MCP_DIR}" + uv run python -m mcp_server \ + --metalake "${MCP_METALAKE}" \ + --gravitino-uri "${GRAVITINO_URI}" \ + --transport http \ + --mcp-url "${MCP_URL}" +) & +MCP_PID=$! + +log "Waiting for MCP server to become reachable..." +for i in $(seq 1 30); do + if nc -z localhost "${MCP_PORT}" 2>/dev/null; then + log "MCP server is up." + break + fi + if [[ "${i}" == "30" ]]; then + log "ERROR: MCP server did not start in time" + exit 1 + fi + sleep 1 +done + +# --------------------------------------------------------------------------- +# 5. Run the pytest integration suite +# --------------------------------------------------------------------------- +log "Running integration tests..." +( + cd "${MCP_DIR}" + GRAVITINO_URI="${GRAVITINO_URI}" \ + MCP_URL="${MCP_URL}" \ + MCP_METALAKE="${MCP_METALAKE}" \ + MCP_AUDIT_LOG="${MCP_AUDIT_LOG}" \ + uv run pytest tests/integration -v +) + +log "Integration tests passed." diff --git a/mcp-server/tests/integration/__init__.py b/mcp-server/tests/integration/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mcp-server/tests/integration/conftest.py b/mcp-server/tests/integration/conftest.py new file mode 100644 index 0000000000..313fff509e --- /dev/null +++ b/mcp-server/tests/integration/conftest.py @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""pytest fixtures for the MCP authorization integration test. + +The orchestration script (``dev/run_authz_integration_test.sh``) starts a real +Gravitino server and the MCP server in HTTP mode, then exports the connection +details below. When these env vars are absent the whole integration suite is +skipped so a plain ``pytest`` run stays green without external services. + +Required environment variables: + GRAVITINO_URI e.g. http://localhost:8090 + MCP_URL e.g. http://localhost:8000/mcp + MCP_METALAKE metalake name the MCP server was launched against +""" + +import os + +import pytest + +from tests.integration.gravitino_setup import GravitinoFixture + +_REQUIRED_ENV = ("GRAVITINO_URI", "MCP_URL", "MCP_METALAKE") + + +def _missing_env() -> list: + return [name for name in _REQUIRED_ENV if not os.environ.get(name)] + + [email protected](scope="session") +def integration_env() -> dict: + missing = _missing_env() + if missing: + pytest.skip( + "Integration test requires a running Gravitino + MCP server. " + f"Missing env: {', '.join(missing)}. " + "Run via dev/run_authz_integration_test.sh." + ) + return { + "gravitino_uri": os.environ["GRAVITINO_URI"], + "mcp_url": os.environ["MCP_URL"], + "metalake": os.environ["MCP_METALAKE"], + } + + [email protected](scope="session") +def gravitino_fixture(integration_env: dict) -> GravitinoFixture: + """Provision metalake/catalogs/user/role/grant once for the whole suite.""" + fixture = GravitinoFixture( + gravitino_uri=integration_env["gravitino_uri"], + metalake=integration_env["metalake"], + ) + fixture.provision() + yield fixture + fixture.close() diff --git a/mcp-server/tests/integration/gravitino_setup.py b/mcp-server/tests/integration/gravitino_setup.py new file mode 100644 index 0000000000..688dacfae2 --- /dev/null +++ b/mcp-server/tests/integration/gravitino_setup.py @@ -0,0 +1,140 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Provision Gravitino metadata and authorization fixtures for the integration test. + +All requests are issued as the service admin using Gravitino simple +authentication (``Authorization: Basic base64(user:dummy)``). The fixture +creates a metalake with two model catalogs, a non-admin user ``bob``, and a role +that grants ``bob`` access to only one of the two catalogs. This produces a +visibly different authorization slice between the admin and ``bob`` principals. +""" + +import base64 + +import httpx + + +def basic_auth_header(user: str) -> str: + """Build a Gravitino simple-auth header value for ``user``.""" + credential = base64.b64encode(f"{user}:dummy".encode("utf-8")).decode( + "ascii" + ) + return f"Basic {credential}" + + +class GravitinoFixture: + """Sets up metalake/catalogs/user/role/grant via the Gravitino REST API.""" + + def __init__( + self, + gravitino_uri: str, + metalake: str, + admin_user: str = "admin", + granted_user: str = "bob", + catalog_allowed: str = "cat_allowed", + catalog_denied: str = "cat_denied", + role_name: str = "reader_role", + ): + self.gravitino_uri = gravitino_uri.rstrip("/") + self.metalake = metalake + self.admin_user = admin_user + self.granted_user = granted_user + self.catalog_allowed = catalog_allowed + self.catalog_denied = catalog_denied + self.role_name = role_name + self._client = httpx.Client( + base_url=self.gravitino_uri, + headers={"Authorization": basic_auth_header(admin_user)}, + timeout=30.0, + ) + + def _post(self, path: str, body: dict) -> httpx.Response: + response = self._client.post(path, json=body) + response.raise_for_status() + return response + + def _put(self, path: str, body: dict) -> httpx.Response: + response = self._client.put(path, json=body) + response.raise_for_status() + return response + + def provision(self) -> None: + """Create all metadata and authorization fixtures (idempotent-ish).""" + self._create_metalake() + self._create_model_catalog(self.catalog_allowed) + self._create_model_catalog(self.catalog_denied) + self._add_user(self.granted_user) + self._create_reader_role() + self._grant_role_to_user() + + def _create_metalake(self) -> None: + self._post( + "/api/metalakes", + { + "name": self.metalake, + "comment": "MCP authz integration test metalake", + "properties": {}, + }, + ) + + def _create_model_catalog(self, name: str) -> None: + self._post( + f"/api/metalakes/{self.metalake}/catalogs", + { + "name": name, + "type": "MODEL", + "provider": "model", + "comment": "model catalog for authz test", + "properties": {}, + }, + ) + + def _add_user(self, user: str) -> None: + self._post( + f"/api/metalakes/{self.metalake}/users", + {"name": user}, + ) + + def _create_reader_role(self) -> None: + # Grant bob USE_CATALOG on the allowed catalog only. + self._post( + f"/api/metalakes/{self.metalake}/roles", + { + "name": self.role_name, + "properties": {}, + "securableObjects": [ + { + "fullName": self.catalog_allowed, + "type": "CATALOG", + "privileges": [ + {"name": "USE_CATALOG", "condition": "ALLOW"} + ], + } + ], + }, + ) + + def _grant_role_to_user(self) -> None: + self._put( + f"/api/metalakes/{self.metalake}/permissions" + f"/users/{self.granted_user}/grant/", + {"roleNames": [self.role_name]}, + ) + + def close(self) -> None: + self._client.close() diff --git a/mcp-server/tests/integration/test_authz_e2e.py b/mcp-server/tests/integration/test_authz_e2e.py new file mode 100644 index 0000000000..7c4933e62d --- /dev/null +++ b/mcp-server/tests/integration/test_authz_e2e.py @@ -0,0 +1,160 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""End-to-end authorization integration test through the live MCP HTTP server. + +Validates the three demo acceptance moments against a real Gravitino with +authorization enabled: + + 1. Two principals run the same discovery call and get correctly different, + authorization-scoped results. + 2. A read-only principal attempts a write through MCP and is denied by + Gravitino authorization. + 3. Both the reads and the denied write appear as audit records attributed to + the correct principal. + +These tests only run when GRAVITINO_URI / MCP_URL / MCP_METALAKE are set (see +conftest.py); otherwise the suite is skipped. +""" + +import asyncio +import json +import os +import time + +import pytest +from fastmcp import Client +from fastmcp.client.transports import StreamableHttpTransport + +from tests.integration.gravitino_setup import basic_auth_header + +ADMIN = "admin" +BOB = "bob" +CATALOG_ALLOWED = "cat_allowed" +CATALOG_DENIED = "cat_denied" + + +def _client_for(principal: str, mcp_url: str) -> Client: + """Build an MCP client that authenticates as ``principal`` (simple auth).""" + transport = StreamableHttpTransport( + url=mcp_url, + headers={"Authorization": basic_auth_header(principal)}, + ) + return Client(transport) + + +async def _list_catalog_names(principal: str, mcp_url: str) -> set: + async with _client_for(principal, mcp_url) as client: + result = await client.call_tool("get_list_of_catalogs") + payload = json.loads(result.content[0].text) + return {entry["name"] for entry in payload} + + +def test_moment1_authorization_scoped_discovery( + gravitino_fixture, integration_env +): + """Moment 1: admin and bob get different, correctly scoped catalog lists.""" + mcp_url = integration_env["mcp_url"] + + admin_catalogs = asyncio.run(_list_catalog_names(ADMIN, mcp_url)) + bob_catalogs = asyncio.run(_list_catalog_names(BOB, mcp_url)) + + # Admin owns the metalake and sees both catalogs. + assert CATALOG_ALLOWED in admin_catalogs + assert CATALOG_DENIED in admin_catalogs + + # Bob was granted USE_CATALOG on the allowed catalog only. + assert CATALOG_ALLOWED in bob_catalogs + assert CATALOG_DENIED not in bob_catalogs + + # The two principals must receive different results. + assert admin_catalogs != bob_catalogs + + +def test_moment2_write_denied_for_readonly_principal( + gravitino_fixture, integration_env +): + """Moment 2: bob (no write grant) is denied when creating a tag through MCP.""" + mcp_url = integration_env["mcp_url"] + + async def _attempt_write(): + async with _client_for(BOB, mcp_url) as client: + await client.call_tool( + "create_tag", + { + "name": "denied_tag", + "comment": "should be denied", + "properties": {}, + }, + ) + + with pytest.raises(Exception): # noqa: B017 – any tool error means denial + asyncio.run(_attempt_write()) + + +def test_moment3_audit_trail_attribution(gravitino_fixture, integration_env): + """Moment 3: audit log records reads/writes attributed to the right principal.""" + audit_log = os.environ.get("MCP_AUDIT_LOG") + if not audit_log or not os.path.exists(audit_log): + pytest.skip("MCP_AUDIT_LOG not set or file missing") + + mcp_url = integration_env["mcp_url"] + + async def _attempt_write(): + async with _client_for(BOB, mcp_url) as client: + await client.call_tool( + "create_tag", + {"name": "audit_tag", "comment": "", "properties": {}}, + ) + + # Generate one allowed read (admin) and one denied write (bob). + asyncio.run(_list_catalog_names(ADMIN, mcp_url)) + try: + asyncio.run(_attempt_write()) + except Exception: # noqa: BLE001 – expected denial + pass + + # Give the server a moment to flush the audit handler. + time.sleep(1.0) + + records = [] + with open(audit_log, "r", encoding="utf-8") as fh: + for line in fh: + line = line.strip() + if line: + try: + records.append(json.loads(line)) + except json.JSONDecodeError: + continue + + admin_allows = [ + r + for r in records + if r.get("principal") == ADMIN + and r.get("tool") == "get_list_of_catalogs" + and r.get("outcome") == "allow" + ] + bob_denies = [ + r + for r in records + if r.get("principal") == BOB + and r.get("tool") == "create_tag" + and r.get("outcome") == "deny" + ] + + assert admin_allows, "expected an allow record attributed to admin" + assert bob_denies, "expected a deny record attributed to bob"
