jerryshao commented on code in PR #11558: URL: https://github.com/apache/gravitino/pull/11558#discussion_r3461667451
########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceHealthOperations.java: ########## @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import javax.inject.Inject; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.dto.HealthCheckDTO; +import org.apache.gravitino.dto.responses.HealthResponse; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.metrics.MetricNames; +import org.apache.gravitino.server.web.Utils; + +/** + * Health check endpoints for the Lance REST server. Follows the same MicroProfile Health semantics + * as the main Gravitino server. + * + * <ul> + * <li>{@code GET /lance/health/live} — liveness, 200 as long as the HTTP thread can respond + * <li>{@code GET /lance/health/ready} — readiness, 200 when the namespace wrapper is initialized + * <li>{@code GET /lance/health} — aggregate, 200 when both pass + * </ul> + * + * All endpoints return 503 with a JSON body describing the failed check(s) when unhealthy. + */ +@Path("/health") +@Produces(MediaType.APPLICATION_JSON) +public class LanceHealthOperations { + + private static final String CHECK_HTTP_SERVER = "httpServer"; + private static final String CHECK_NAMESPACE_WRAPPER = "namespaceWrapper"; + + @Inject private NamespaceWrapper namespaceWrapper; + + /** Default constructor for Jersey auto-discovery. */ + public LanceHealthOperations() {} + + /** + * Liveness probe. Returns 200 as long as the HTTP thread can respond. + * + * @return 200 OK with an UP {@link HealthResponse} + */ + @GET + @Path("/live") + @Timed(name = "lance.health.live." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "lance.health.live", absolute = true) + public Response live() { + HealthCheckDTO check = up(CHECK_HTTP_SERVER, Collections.<String, String>emptyMap()); Review Comment: **Minor: unnecessary explicit type witness on `Collections.emptyMap()`.** `Collections.<String, String>emptyMap()` — Java infers the type parameter from context, so the `<String, String>` witness is redundant. `IcebergHealthOperations` uses the inferred form (`Collections.emptyMap()`) throughout. Same applies to the identical calls on lines 105 and 124. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceAuthenticationFilter.java: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import javax.servlet.http.HttpServletResponse; +import org.apache.gravitino.exceptions.UnauthorizedException; +import org.apache.gravitino.server.authentication.AuthenticationFilter; +import org.lance.namespace.model.ErrorResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An {@link AuthenticationFilter} subclass for the Lance REST server that: + * + * <ul> + * <li>allows health check endpoints to bypass authentication via {@link + * LanceHealthCheckPathMatcher}; + * <li>returns Lance-compatible JSON error responses on authentication failure instead of the + * default HTML error pages. + * </ul> + */ +public class LanceAuthenticationFilter extends AuthenticationFilter { + + private static final Logger LOG = LoggerFactory.getLogger(LanceAuthenticationFilter.class); + private static final ObjectMapper MAPPER = new ObjectMapper(); + + public LanceAuthenticationFilter() { + healthCheckMatcher = new LanceHealthCheckPathMatcher(); + } + + @Override + protected void sendAuthErrorResponse(HttpServletResponse response, Exception exception) + throws IOException { + int status; + String message; + if (exception instanceof UnauthorizedException) { + status = HttpServletResponse.SC_UNAUTHORIZED; + message = exception.getMessage(); + if (message == null || message.isEmpty()) { + message = "Authentication failed"; + } + } else { + status = HttpServletResponse.SC_INTERNAL_SERVER_ERROR; Review Comment: **Bug: `ForbiddenException` maps to 500 instead of 403.** The `else` branch catches every non-`UnauthorizedException` — including `ForbiddenException` — and returns 500. The base `AuthenticationFilter.sendAuthErrorResponse` has an explicit `ForbiddenException` → 403 branch. This override silently drops that case. Add an `else if (exception instanceof ForbiddenException)` branch mapping to `SC_FORBIDDEN` (403), and add a corresponding test to `TestLanceAuthenticationFilter`. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceAuthenticationFilter.java: ########## @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service; + +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import javax.servlet.http.HttpServletResponse; +import org.apache.gravitino.exceptions.UnauthorizedException; +import org.apache.gravitino.server.authentication.AuthenticationFilter; +import org.lance.namespace.model.ErrorResponse; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * An {@link AuthenticationFilter} subclass for the Lance REST server that: + * + * <ul> + * <li>allows health check endpoints to bypass authentication via {@link + * LanceHealthCheckPathMatcher}; + * <li>returns Lance-compatible JSON error responses on authentication failure instead of the + * default HTML error pages. + * </ul> + */ +public class LanceAuthenticationFilter extends AuthenticationFilter { + + private static final Logger LOG = LoggerFactory.getLogger(LanceAuthenticationFilter.class); + private static final ObjectMapper MAPPER = new ObjectMapper(); Review Comment: **Style: Use the project-shared `ObjectMapper` instead of a raw `new ObjectMapper()`.** The base `AuthenticationFilter` uses `ObjectMapperProvider.objectMapper()`, which carries project-wide serialization config (module registration, null handling, date formats, etc.). `IcebergAuthenticationFilter` similarly uses `IcebergObjectMapper.getInstance()`. A bare `new ObjectMapper()` bypasses all of that — use whichever shared mapper is appropriate for the Lance serialization context. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java: ########## @@ -66,7 +69,13 @@ public void serviceInit(Map<String, String> properties, boolean auxMode) { LanceConfig lanceConfig = new LanceConfig(properties); JettyServerConfig serverConfig = JettyServerConfig.fromConfig(lanceConfig); Review Comment: **Minor: consider a named class instead of an anonymous `JettyServer` subclass.** Anonymous inner classes in field initializers are harder to test and document in isolation. Since `LanceAuthenticationFilter` already exists as a named class, a small named `LanceJettyServer` (even package-private) would be consistent with the rest of the module and easier to reference in tests or javadoc. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/LanceRESTService.java: ########## @@ -66,7 +69,13 @@ public void serviceInit(Map<String, String> properties, boolean auxMode) { LanceConfig lanceConfig = new LanceConfig(properties); JettyServerConfig serverConfig = JettyServerConfig.fromConfig(lanceConfig); - server = new JettyServer(); + server = Review Comment: **Style: FQN in method signature — use an import instead.** ```java protected javax.servlet.Filter createAuthenticationFilter() { ``` Per the project coding standards, FQNs inline in code should be replaced with a regular `import javax.servlet.Filter;` declaration. The return type should just be `Filter`. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/LanceHealthCheckPathMatcher.java: ########## @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service; + +import org.apache.gravitino.server.web.HealthCheckPathMatcher; + +/** + * A {@link HealthCheckPathMatcher} for the Lance REST server that additionally recognises {@code + * /lance/health} and {@code /lance/health/*} as health check endpoints. + * + * <p>Pass an instance of this class to both {@link + * org.apache.gravitino.server.authentication.AuthenticationFilter} (via {@code + * LanceAuthenticationFilter}) and {@link org.apache.gravitino.server.web.HttpAuditFilter} when + * constructing the Lance REST server so that both filters agree on which paths are probe traffic. + */ +public class LanceHealthCheckPathMatcher extends HealthCheckPathMatcher { + + @Override + public boolean isHealthCheckPath(String path) { + if (super.isHealthCheckPath(path)) { + return true; + } + if (path == null) { Review Comment: **Minor: redundant `null` guard — `super.isHealthCheckPath(path)` already handles `null`.** `HealthCheckPathMatcher.isHealthCheckPath` returns `false` immediately when `path == null`, so `null` can never reach this second check. The guard is harmless but misleading — it implies `null` could slip through the `super` call. Safe to remove. ########## lance/lance-rest-server/src/main/java/org/apache/gravitino/lance/service/rest/LanceHealthOperations.java: ########## @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.gravitino.lance.service.rest; + +import com.codahale.metrics.annotation.ResponseMetered; +import com.codahale.metrics.annotation.Timed; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import javax.inject.Inject; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.Response; +import org.apache.gravitino.dto.HealthCheckDTO; +import org.apache.gravitino.dto.responses.HealthResponse; +import org.apache.gravitino.lance.common.ops.NamespaceWrapper; +import org.apache.gravitino.metrics.MetricNames; +import org.apache.gravitino.server.web.Utils; + +/** + * Health check endpoints for the Lance REST server. Follows the same MicroProfile Health semantics + * as the main Gravitino server. + * + * <ul> + * <li>{@code GET /lance/health/live} — liveness, 200 as long as the HTTP thread can respond + * <li>{@code GET /lance/health/ready} — readiness, 200 when the namespace wrapper is initialized + * <li>{@code GET /lance/health} — aggregate, 200 when both pass + * </ul> + * + * All endpoints return 503 with a JSON body describing the failed check(s) when unhealthy. + */ +@Path("/health") +@Produces(MediaType.APPLICATION_JSON) +public class LanceHealthOperations { + + private static final String CHECK_HTTP_SERVER = "httpServer"; + private static final String CHECK_NAMESPACE_WRAPPER = "namespaceWrapper"; + + @Inject private NamespaceWrapper namespaceWrapper; + + /** Default constructor for Jersey auto-discovery. */ + public LanceHealthOperations() {} + + /** + * Liveness probe. Returns 200 as long as the HTTP thread can respond. + * + * @return 200 OK with an UP {@link HealthResponse} + */ + @GET + @Path("/live") + @Timed(name = "lance.health.live." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "lance.health.live", absolute = true) + public Response live() { + HealthCheckDTO check = up(CHECK_HTTP_SERVER, Collections.<String, String>emptyMap()); + HealthResponse healthResponse = + new HealthResponse(HealthCheckDTO.Status.UP, Collections.singletonList(check)); + return Utils.ok(healthResponse); + } + + /** + * Readiness probe. Returns 200 when the {@link NamespaceWrapper} is initialized, 503 otherwise. + * + * @return 200 OK when ready, 503 Service Unavailable with a DOWN {@link HealthResponse} otherwise + */ + @GET + @Path("/ready") + @Timed(name = "lance.health.ready." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "lance.health.ready", absolute = true) + public Response ready() { + HealthCheckDTO namespaceCheck = checkNamespaceWrapper(); + HealthCheckDTO.Status overall = namespaceCheck.getStatus(); + HealthResponse body = new HealthResponse(overall, Collections.singletonList(namespaceCheck)); + return overall == HealthCheckDTO.Status.UP ? Utils.ok(body) : Utils.serviceUnavailable(body); + } + + /** + * Aggregate health check. Returns 200 when both liveness and readiness pass, 503 otherwise. + * + * @return 200 OK when healthy, 503 Service Unavailable with failing checks described in the body + */ + @GET + @Timed(name = "lance.health." + MetricNames.HTTP_PROCESS_DURATION, absolute = true) + @ResponseMetered(name = "lance.health", absolute = true) + public Response health() { + List<HealthCheckDTO> checks = new ArrayList<>(2); + checks.add(up(CHECK_HTTP_SERVER, Collections.<String, String>emptyMap())); + checks.add(checkNamespaceWrapper()); + + HealthCheckDTO.Status overall = + checks.stream().anyMatch(c -> c.getStatus() == HealthCheckDTO.Status.DOWN) + ? HealthCheckDTO.Status.DOWN + : HealthCheckDTO.Status.UP; + + HealthResponse body = new HealthResponse(overall, checks); + return overall == HealthCheckDTO.Status.UP ? Utils.ok(body) : Utils.serviceUnavailable(body); + } + + private HealthCheckDTO checkNamespaceWrapper() { + NamespaceWrapper wrapper = getNamespaceWrapper(); + if (wrapper == null) { + return down(CHECK_NAMESPACE_WRAPPER, "reason", "namespace wrapper not initialized"); + } + try { + wrapper.asNamespaceOps(); Review Comment: **Functional concern: readiness probe triggers lazy initialization and can block indefinitely.** ```java wrapper.asNamespaceOps(); // calls initIfNeeded() → synchronized initAll() → initialize() ``` `NamespaceWrapper.asNamespaceOps()` lazily initializes the backend (the comment in that class says "it may block the startup"). A Kubernetes readiness probe hitting `/ready` before initialization completes will block inside the `synchronized` block until `initAll()` finishes (or hangs), which can cause probe timeouts and false pod restarts. Compare with `IcebergHealthOperations.checkCatalogWrapperManager()` — it simply checks `if (getCatalogWrapperManager() == null)` with no side effects. `NamespaceWrapper` already has a `private volatile boolean initialized` field. The cleanest fix is to expose a `public boolean isInitialized()` accessor and use that here instead of calling `asNamespaceOps()`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
