This is an automated email from the ASF dual-hosted git repository.

github-merge-queue[bot] pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/main by this push:
     new 7b7a5c69b8 fix(auth): enforce @RolesAllowed in file-service by 
registering RolesAllowedDynamicFeature (#5198)
7b7a5c69b8 is described below

commit 7b7a5c69b83c88caee17b34b9029c7575bebe143
Author: Matthew B. <[email protected]>
AuthorDate: Tue Jun 9 14:13:47 2026 -0700

    fix(auth): enforce @RolesAllowed in file-service by registering 
RolesAllowedDynamicFeature (#5198)
    
    ### What changes were proposed in this PR?
    
    Registers Jersey's `RolesAllowedDynamicFeature` in `file-service`'s
    Jersey environment so the `@RolesAllowed` / `@PermitAll` annotations
    already present on `DatasetResource` are actually enforced.
    
    `file-service` registered `JwtAuthFilter` (authentication) and the
    `AuthValueFactoryProvider.Binder` (so `@Auth user` injects), but never
    `RolesAllowedDynamicFeature` (authorization). The ~20
    `@RolesAllowed(Array("REGULAR", "ADMIN"))` annotations on
    `DatasetResource` were therefore decorative: any valid JWT reached the
    resource method regardless of role. This adds the one missing
    registration.
    
    ```scala
    // Enforce @RolesAllowed annotations on resource methods
    environment.jersey.register(classOf[RolesAllowedDynamicFeature])
    ```
    
    Once enforcement is live, the six public-dataset endpoints
    (`getPublicPresignedUrl`, `getPublicPresignedUrlWithS3`,
    `getPublicDatasetVersionList`,
    `retrievePublicDatasetVersionRootFileNodes`, `getPublicDataset`,
    `getDatasetCover`) must stay reachable without a JWT for anonymous hub
    visitors. They are already `@PermitAll` on `main`; the new spec pins
    that so a future refactor cannot silently lock them out.
    
    ### Scope note
    
    This PR was originally "complete @RolesAllowed enforcement across
    microservices." Since then the rest of that work has landed or is in
    flight on `main`, so this PR has been narrowed to the one remaining gap:
    
    - `config-service`, `computing-unit-managing-service`,
    `workflow-compiling-service`: done in #5049 / #5199.
    - `JwtAuthFilter` priority + eager-401 + `@PermitAll` opt-out: done in
    #5404.
    - `access-control-service` (LiteLLM proxy hardening + feature
    registration): handled by #5421 (@Yicong-Huang).
    - **`file-service`: this PR.**
    
    With this and #5421, every microservice that uses `@RolesAllowed`
    enforces it, closing out #4904.
    
    ### Any related issues, documentation, or discussions?
    
    Closes #5433. Follow-up to #4904; companion to #5421.
    
    ### How was this PR tested?
    
    Added `DatasetResourcePermissionsSpec`, which verifies the six
    public-dataset endpoints carry `@PermitAll` so they remain
    anonymous-accessible after enforcement is enabled. `file-service` and
    the auth modules compile clean. Existing `DatasetResource`
    `@RolesAllowed` annotations are unchanged.
    
    ### Was this PR authored or co-authored using generative AI tooling?
    
    Co-authored with Claude Opus 4.8 in compliance with ASF Generative
    Tooling Guidance.
    
    ---------
    
    Co-authored-by: Yicong Huang 
<[email protected]>
---
 .../org/apache/texera/service/FileService.scala    |   4 +
 .../resource/DatasetResourcePermissionsSpec.scala  | 106 +++++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git 
a/file-service/src/main/scala/org/apache/texera/service/FileService.scala 
b/file-service/src/main/scala/org/apache/texera/service/FileService.scala
index 64a2f64eba..88f7650378 100644
--- a/file-service/src/main/scala/org/apache/texera/service/FileService.scala
+++ b/file-service/src/main/scala/org/apache/texera/service/FileService.scala
@@ -45,6 +45,7 @@ import org.apache.texera.service.resource.{
 import org.apache.texera.service.util.S3StorageClient
 import org.apache.texera.service.util.LargeBinaryManager
 import org.eclipse.jetty.server.session.SessionHandler
+import org.glassfish.jersey.server.filter.RolesAllowedDynamicFeature
 import java.nio.file.Path
 
 class FileService extends Application[FileServiceConfiguration] with 
LazyLogging {
@@ -95,6 +96,9 @@ class FileService extends 
Application[FileServiceConfiguration] with LazyLogging
       new 
io.dropwizard.auth.AuthValueFactoryProvider.Binder(classOf[SessionUser])
     )
 
+    // Enforce @RolesAllowed annotations on resource methods
+    environment.jersey.register(classOf[RolesAllowedDynamicFeature])
+
     environment.jersey.register(classOf[DatasetResource])
     environment.jersey.register(classOf[DatasetAccessResource])
 
diff --git 
a/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourcePermissionsSpec.scala
 
b/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourcePermissionsSpec.scala
new file mode 100644
index 0000000000..433f218389
--- /dev/null
+++ 
b/file-service/src/test/scala/org/apache/texera/service/resource/DatasetResourcePermissionsSpec.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.texera.service.resource
+
+import jakarta.annotation.security.{PermitAll, RolesAllowed}
+import jakarta.ws.rs.HttpMethod
+import org.scalatest.flatspec.AnyFlatSpec
+import org.scalatest.matchers.should.Matchers
+
+import java.lang.reflect.Method
+
+// FileService registers RolesAllowedDynamicFeature, so every DatasetResource
+// HTTP endpoint is now enforced by its own method-level annotation. A method
+// with neither @PermitAll nor @RolesAllowed defaults to OPEN, so the contract
+// these tests pin is: every endpoint carries exactly one of the two, the
+// non-public ones require REGULAR/ADMIN, and the public hub reads stay
+// anonymous-accessible.
+class DatasetResourcePermissionsSpec extends AnyFlatSpec with Matchers {
+
+  // A JAX-RS endpoint is any public method carrying an HTTP-method annotation
+  // (@GET/@POST/@PUT/@DELETE/...), each of which is meta-annotated 
@HttpMethod.
+  // Scanning by the meta-annotation avoids hard-coding the verb list.
+  private val endpointMethods: Seq[Method] =
+    classOf[DatasetResource].getMethods.toSeq
+      
.filter(_.getAnnotations.exists(_.annotationType.isAnnotationPresent(classOf[HttpMethod])))
+
+  private def isPermitAll(m: Method): Boolean = 
m.getAnnotation(classOf[PermitAll]) != null
+  private def rolesOf(m: Method): Option[RolesAllowed] =
+    Option(m.getAnnotation(classOf[RolesAllowed]))
+
+  // Public read endpoints for anonymous hub visitors browsing public datasets.
+  // These MUST stay @PermitAll or the public hub breaks for logged-out users.
+  private val publicEndpointMethods: Set[String] = Set(
+    "getPublicPresignedUrl",
+    "getPublicPresignedUrlWithS3",
+    "getPublicDatasetVersionList",
+    "retrievePublicDatasetVersionRootFileNodes",
+    "getPublicDataset",
+    "getDatasetCover",
+    "getDatasetCoverUrl"
+  )
+
+  "DatasetResource" should "expose HTTP endpoints (sanity check for the 
reflection scan)" in {
+    endpointMethods should not be empty
+  }
+
+  it should "annotate every HTTP endpoint with exactly one of @PermitAll or 
@RolesAllowed (none defaults to open)" in {
+    endpointMethods.foreach { m =>
+      withClue(s"${m.getName}: ") {
+        (isPermitAll(m), rolesOf(m).isDefined) match {
+          case (true, true) => fail("carries both @PermitAll and 
@RolesAllowed")
+          case (false, false) =>
+            fail("carries neither @PermitAll nor @RolesAllowed (would default 
to open)")
+          case _ => succeed
+        }
+      }
+    }
+  }
+
+  it should "guard every non-public endpoint with @RolesAllowed(REGULAR, 
ADMIN)" in {
+    val nonPublic = endpointMethods.filterNot(m => 
publicEndpointMethods.contains(m.getName))
+    nonPublic should not be empty
+    nonPublic.foreach { m =>
+      withClue(s"${m.getName}: ") {
+        isPermitAll(m) shouldBe false
+        val roles = rolesOf(m)
+        roles should not be empty
+        roles.get.value() should contain theSameElementsAs Array("REGULAR", 
"ADMIN")
+      }
+    }
+  }
+
+  it should "keep the public dataset endpoints @PermitAll for unauthenticated 
visitors" in {
+    // Every documented public endpoint must still exist (guards against 
renames
+    // that would silently drop a name from the non-public check above).
+    val present = 
endpointMethods.map(_.getName).toSet.intersect(publicEndpointMethods)
+    withClue("public endpoints missing from DatasetResource (renamed?): ") {
+      present shouldBe publicEndpointMethods
+    }
+    endpointMethods
+      .filter(m => publicEndpointMethods.contains(m.getName))
+      .foreach { m =>
+        withClue(s"${m.getName} must be @PermitAll: ") {
+          isPermitAll(m) shouldBe true
+          rolesOf(m) shouldBe empty
+        }
+      }
+  }
+}

Reply via email to