This is an automated email from the ASF dual-hosted git repository.

aicam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/texera.git


The following commit(s) were added to refs/heads/master by this push:
     new f2214a1828 feat: switch single-file dataset downloads to use browser 
native downloads (#3621)
f2214a1828 is described below

commit f2214a1828370745fa4434a95f4b72cd96cd6c21
Author: Madison Lin <[email protected]>
AuthorDate: Sun Aug 10 19:35:34 2025 -0700

    feat: switch single-file dataset downloads to use browser native downloads 
(#3621)
    
    ### Purpose ###
    This pull request partially addresses issue #3404 by changing the
    download method of single-file dataset downloads to use browser native
    downloads. Using the browser to handle downloads enables the browser's
    file download UI, which includes a progress bar when downloading larger
    files.
    
    Previously, we would generate a pre-signed URL for the file via LakeFS
    API and download it as a Blob. Now, we generate a pre-signed URL for the
    file via S3 Gateway API, which offers additional flexibility, and click
    on the URL to trigger browser native download.
    
    ### Changes ###
    - added method to generate pre-signed URL via S3 Gateway API, with
    additional flexibility to specify file name and type
    - updated frontend and backend to directly click on a pre-signed URL
    generated via the new method for single file dataset downloads
    - removed success and error notifications for single file dataset
    downloads (the success and error of a file download cannot be obtained
    when using browser native download)
    - updated unit tests to verify new download method
    
    Changed Files:
    -
    
file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
    -
    
file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
    -
    
gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
    - gui/src/app/dashboard/service/user/dataset/dataset.service.ts
    - gui/src/app/dashboard/service/user/download/download.service.spec.ts
    - gui/src/app/dashboard/service/user/download/download.service.ts
---
 .../texera/service/resource/DatasetResource.scala  | 114 ++++++++++++++++-----
 .../ics/texera/service/util/S3StorageClient.scala  |  67 +++++++++++-
 .../dataset-detail.component.ts                    |   3 +-
 .../service/user/dataset/dataset.service.ts        |  23 +++++
 .../service/user/download/download.service.spec.ts |  42 --------
 .../service/user/download/download.service.ts      |  13 +--
 6 files changed, 183 insertions(+), 79 deletions(-)

diff --git 
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
 
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
index 37e48f6631..55722395aa 100644
--- 
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
+++ 
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
@@ -195,6 +195,7 @@ object DatasetResource {
 class DatasetResource {
   private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no access 
to this dataset"
   private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the 
dataset not found"
+  private val EXPIRATION_MINUTES = 5
 
   /**
     * Helper function to get the dataset from DB with additional information 
including user access privilege and owner email
@@ -564,6 +565,19 @@ class DatasetResource {
     generatePresignedResponse(encodedUrl, datasetName, commitHash, uid)
   }
 
+  @GET
+  @RolesAllowed(Array("REGULAR", "ADMIN"))
+  @Path("/presign-download-s3")
+  def getPresignedUrlWithS3(
+      @QueryParam("filePath") encodedUrl: String,
+      @QueryParam("datasetName") datasetName: String,
+      @QueryParam("commitHash") commitHash: String,
+      @Auth user: SessionUser
+  ): Response = {
+    val uid = user.getUid
+    generatePresignedResponseWithS3(encodedUrl, datasetName, commitHash, uid)
+  }
+
   @GET
   @Path("/public-presign-download")
   def getPublicPresignedUrl(
@@ -571,9 +585,17 @@ class DatasetResource {
       @QueryParam("datasetName") datasetName: String,
       @QueryParam("commitHash") commitHash: String
   ): Response = {
-    val user = new SessionUser(new User())
-    val uid = user.getUid
-    generatePresignedResponse(encodedUrl, datasetName, commitHash, uid)
+    generatePresignedResponse(encodedUrl, datasetName, commitHash, null)
+  }
+
+  @GET
+  @Path("/public-presign-download-s3")
+  def getPublicPresignedUrlWithS3(
+      @QueryParam("filePath") encodedUrl: String,
+      @QueryParam("datasetName") datasetName: String,
+      @QueryParam("commitHash") commitHash: String
+  ): Response = {
+    generatePresignedResponseWithS3(encodedUrl, datasetName, commitHash, null)
   }
 
   @DELETE
@@ -1200,34 +1222,83 @@ class DatasetResource {
       commitHash: String,
       uid: Integer
   ): Response = {
+    resolveDatasetAndPath(encodedUrl, datasetName, commitHash, uid) match {
+      case Left(errorResponse) =>
+        errorResponse
+
+      case Right((resolvedDatasetName, resolvedCommitHash, resolvedFilePath)) 
=>
+        val url = LakeFSStorageClient.getFilePresignedUrl(
+          resolvedDatasetName,
+          resolvedCommitHash,
+          resolvedFilePath
+        )
+
+        Response.ok(Map("presignedUrl" -> url)).build()
+    }
+  }
+
+  private def generatePresignedResponseWithS3(
+      encodedUrl: String,
+      datasetName: String,
+      commitHash: String,
+      uid: Integer
+  ): Response = {
+    resolveDatasetAndPath(encodedUrl, datasetName, commitHash, uid) match {
+      case Left(errorResponse) =>
+        errorResponse
+
+      case Right((resolvedDatasetName, resolvedCommitHash, resolvedFilePath)) 
=>
+        val fileName = 
resolvedFilePath.split("/").lastOption.getOrElse("download")
+        val contentType = "application/octet-stream"
+        val url = S3StorageClient.getFilePresignedUrl(
+          resolvedDatasetName,
+          resolvedCommitHash,
+          resolvedFilePath,
+          fileName,
+          contentType,
+          EXPIRATION_MINUTES
+        )
+
+        Response.ok(Map("presignedUrl" -> url)).build()
+    }
+  }
+
+  private def resolveDatasetAndPath(
+      encodedUrl: String,
+      datasetName: String,
+      commitHash: String,
+      uid: Integer
+  ): Either[Response, (String, String, String)] = {
     val decodedPathStr = URLDecoder.decode(encodedUrl, 
StandardCharsets.UTF_8.name())
 
     (Option(datasetName), Option(commitHash)) match {
       case (Some(_), None) | (None, Some(_)) =>
         // Case 1: Only one parameter is provided (error case)
-        Response
-          .status(Response.Status.BAD_REQUEST)
-          .entity(
-            "Both datasetName and commitHash must be provided together, or 
neither should be provided."
-          )
-          .build()
+        Left(
+          Response
+            .status(Response.Status.BAD_REQUEST)
+            .entity(
+              "Both datasetName and commitHash must be provided together, or 
neither should be provided."
+            )
+            .build()
+        )
 
       case (Some(dsName), Some(commit)) =>
         // Case 2: datasetName and commitHash are provided, validate access
-        withTransaction(context) { ctx =>
+        val response = withTransaction(context) { ctx =>
           val datasetDao = new DatasetDao(ctx.configuration())
           val datasets = datasetDao.fetchByName(dsName).asScala.toList
 
           if (datasets.isEmpty || !userHasReadAccess(ctx, 
datasets.head.getDid, uid))
             throw new 
ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE)
 
-          val url = LakeFSStorageClient.getFilePresignedUrl(dsName, commit, 
decodedPathStr)
-          Response.ok(Map("presignedUrl" -> url)).build()
+          (dsName, commit, decodedPathStr)
         }
+        Right(response)
 
       case (None, None) =>
         // Case 3: Neither datasetName nor commitHash are provided, resolve 
normally
-        withTransaction(context) { ctx =>
+        val response = withTransaction(context) { ctx =>
           val fileUri = FileResolver.resolve(decodedPathStr)
           val document = 
DocumentFactory.openReadonlyDocument(fileUri).asInstanceOf[OnDataset]
           val datasetDao = new DatasetDao(ctx.configuration())
@@ -1236,18 +1307,13 @@ class DatasetResource {
           if (datasets.isEmpty || !userHasReadAccess(ctx, 
datasets.head.getDid, uid))
             throw new 
ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE)
 
-          Response
-            .ok(
-              Map(
-                "presignedUrl" -> LakeFSStorageClient.getFilePresignedUrl(
-                  document.getDatasetName(),
-                  document.getVersionHash(),
-                  document.getFileRelativePath()
-                )
-              )
-            )
-            .build()
+          (
+            document.getDatasetName(),
+            document.getVersionHash(),
+            document.getFileRelativePath()
+          )
         }
+        Right(response)
     }
   }
 }
diff --git 
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
 
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
index 2b1afd1165..ae110fec52 100644
--- 
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
+++ 
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
@@ -24,7 +24,12 @@ import 
software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCrede
 import software.amazon.awssdk.regions.Region
 import software.amazon.awssdk.services.s3.{S3Client, S3Configuration}
 import software.amazon.awssdk.services.s3.model._
+import software.amazon.awssdk.services.s3.presigner.S3Presigner
+import 
software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest
+import software.amazon.awssdk.services.s3.model.GetObjectRequest
 
+import java.net.URI
+import java.time.Duration
 import java.security.MessageDigest
 import scala.jdk.CollectionConverters._
 
@@ -36,10 +41,10 @@ import scala.jdk.CollectionConverters._
 object S3StorageClient {
   val MINIMUM_NUM_OF_MULTIPART_S3_PART: Long = 5L * 1024 * 1024 // 5 MiB
   val MAXIMUM_NUM_OF_MULTIPART_S3_PARTS = 10_000
+  val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, 
StorageConfig.s3Password)
 
   // Initialize MinIO-compatible S3 Client
   private lazy val s3Client: S3Client = {
-    val credentials = AwsBasicCredentials.create(StorageConfig.s3Username, 
StorageConfig.s3Password)
     S3Client
       .builder()
       .credentialsProvider(StaticCredentialsProvider.create(credentials))
@@ -51,6 +56,29 @@ object S3StorageClient {
       .build()
   }
 
+  // Initialize S3-compatible presigner for LakeFS S3 Gateway
+  private lazy val s3Presigner: S3Presigner = {
+    val fullUri = new URI(StorageConfig.lakefsEndpoint)
+    val baseUri = new URI(
+      fullUri.getScheme,
+      null,
+      fullUri.getHost,
+      fullUri.getPort,
+      null,
+      null,
+      null
+    ) // Extract just the base (scheme + host + port)
+    S3Presigner
+      .builder()
+      .credentialsProvider(StaticCredentialsProvider.create(credentials))
+      .region(Region.of(StorageConfig.s3Region))
+      .endpointOverride(baseUri) // LakeFS base URL ("http://localhost:8000"; 
on local)
+      .serviceConfiguration(
+        S3Configuration.builder().pathStyleAccessEnabled(true).build()
+      )
+      .build()
+  }
+
   /**
     * Checks if a directory (prefix) exists within an S3 bucket.
     *
@@ -139,4 +167,41 @@ object S3StorageClient {
       s3Client.deleteObjects(deleteObjectsRequest)
     }
   }
+
+  /**
+    * Retrieves file content from a specific commit and path.
+    *
+    * @param repoName            Repository name.
+    * @param commitHash          Commit hash of the version.
+    * @param filePath            Path to the file in the repository.
+    * @param fileName            Name of the file downloaded via the presigned 
URL.
+    * @param contentType         Type of the file downloaded via the presigned 
URL.
+    * @param expirationMinutes   Duration in minutes that the presigned URL is 
valid.
+    */
+  def getFilePresignedUrl(
+      repoName: String,
+      commitHash: String,
+      filePath: String,
+      fileName: String,
+      contentType: String,
+      expirationMinutes: Long
+  ): String = {
+    val getObjectRequest = GetObjectRequest
+      .builder()
+      .bucket(repoName)
+      .key(s"$commitHash/$filePath")
+      .responseContentDisposition(s"attachment; filename='$fileName'")
+      .responseContentType(contentType)
+      .build()
+
+    val presignRequest = GetObjectPresignRequest
+      .builder()
+      .signatureDuration(Duration.ofMinutes(expirationMinutes))
+      .getObjectRequest(getObjectRequest)
+      .build()
+
+    val presignedUrl = 
s3Presigner.presignGetObject(presignRequest).url().toString
+    s3Presigner.close()
+    presignedUrl
+  }
 }
diff --git 
a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
 
b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
index 9dd4de0c40..fb40c946bc 100644
--- 
a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
+++ 
b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
@@ -269,8 +269,7 @@ export class DatasetDetailComponent implements OnInit {
 
   onClickDownloadCurrentFile = (): void => {
     if (!this.did || !this.selectedVersion?.dvid) return;
-
-    
this.downloadService.downloadSingleFile(this.currentDisplayedFileName).pipe(untilDestroyed(this)).subscribe();
+    this.downloadService.downloadSingleFile(this.currentDisplayedFileName);
   };
 
   onClickScaleTheView() {
diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts 
b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
index 226b1edb97..333be35fa3 100644
--- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
+++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
@@ -93,6 +93,29 @@ export class DatasetService {
       .pipe(switchMap(({ presignedUrl }) => this.http.get(presignedUrl, { 
responseType: "blob" })));
   }
 
+  /**
+   * Retrieves a single file from a dataset version using a pre-signed URL.
+   * @param filePath Relative file path within the dataset.
+   * @param isLogin Determine whether a user is currently logged in
+   * @returns void File is downloaded natively by the browser.
+   */
+  public retrieveDatasetVersionSingleFileViaBrowser(filePath: string, isLogin: 
boolean = true): void {
+    const endpointSegment = isLogin ? "presign-download-s3" : 
"public-presign-download-s3";
+    const endpoint = 
`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${endpointSegment}?filePath=${encodeURIComponent(filePath)}`;
+
+    this.http.get<{ presignedUrl: string }>(endpoint).subscribe({
+      next: response => {
+        const presignedUrl = response.presignedUrl;
+        const downloadUrl = document.createElement("a");
+
+        downloadUrl.href = presignedUrl;
+        document.body.appendChild(downloadUrl);
+        downloadUrl.click();
+        downloadUrl.remove();
+      },
+    });
+  }
+
   /**
    * Retrieves a zip file of a dataset version.
    * @param did Dataset ID
diff --git 
a/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts 
b/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
index f6d5a4b4b1..d1c55a434f 100644
--- a/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
+++ b/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
@@ -60,48 +60,6 @@ describe("DownloadService", () => {
     notificationServiceSpy = TestBed.inject(NotificationService) as 
jasmine.SpyObj<NotificationService>;
   });
 
-  it("should download a single file successfully", (done: DoneFn) => {
-    const filePath = "test/file.txt";
-    const mockBlob = new Blob(["test content"], { type: "text/plain" });
-
-    
datasetServiceSpy.retrieveDatasetVersionSingleFile.and.returnValue(of(mockBlob));
-
-    downloadService.downloadSingleFile(filePath).subscribe({
-      next: blob => {
-        expect(blob).toBe(mockBlob);
-        expect(notificationServiceSpy.info).toHaveBeenCalledWith("Starting to 
download file test/file.txt");
-        
expect(datasetServiceSpy.retrieveDatasetVersionSingleFile).toHaveBeenCalledWith(filePath);
-        expect(fileSaverServiceSpy.saveAs).toHaveBeenCalledWith(mockBlob, 
"file.txt");
-        expect(notificationServiceSpy.success).toHaveBeenCalledWith("File 
test/file.txt has been downloaded");
-        done();
-      },
-      error: (error: unknown) => {
-        fail("Should not have thrown an error: " + error);
-      },
-    });
-  });
-
-  it("should handle download failure correctly", done => {
-    const filePath = "test/file.txt";
-    const errorMessage = "Download failed";
-
-    
datasetServiceSpy.retrieveDatasetVersionSingleFile.and.returnValue(throwError(()
 => new Error(errorMessage)));
-
-    downloadService.downloadSingleFile(filePath).subscribe({
-      next: () => {
-        fail("Should have thrown an error");
-      },
-      error: (error: unknown) => {
-        expect(error).toBeTruthy();
-        expect(notificationServiceSpy.info).toHaveBeenCalledWith("Starting to 
download file test/file.txt");
-        
expect(datasetServiceSpy.retrieveDatasetVersionSingleFile).toHaveBeenCalledWith(filePath);
-        expect(fileSaverServiceSpy.saveAs).not.toHaveBeenCalled();
-        expect(notificationServiceSpy.error).toHaveBeenCalledWith("Error 
downloading file 'test/file.txt'");
-        done();
-      },
-    });
-  });
-
   it("should download a dataset successfully", done => {
     const datasetId = 1;
     const datasetName = "TestDataset";
diff --git 
a/core/gui/src/app/dashboard/service/user/download/download.service.ts 
b/core/gui/src/app/dashboard/service/user/download/download.service.ts
index 813dea08e2..39b89f0f01 100644
--- a/core/gui/src/app/dashboard/service/user/download/download.service.ts
+++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts
@@ -93,16 +93,9 @@ export class DownloadService {
     );
   }
 
-  downloadSingleFile(filePath: string): Observable<Blob> {
-    const DEFAULT_FILE_NAME = "download";
-    const fileName = filePath.split("/").pop() || DEFAULT_FILE_NAME;
-    return this.downloadWithNotification(
-      () => this.datasetService.retrieveDatasetVersionSingleFile(filePath),
-      fileName,
-      `Starting to download file ${filePath}`,
-      `File ${filePath} has been downloaded`,
-      `Error downloading file '${filePath}'`
-    );
+  downloadSingleFile(filePath: string): void {
+    this.notificationService.info(`Starting to download file ${filePath}`);
+    this.datasetService.retrieveDatasetVersionSingleFileViaBrowser(filePath);
   }
 
   downloadWorkflowsAsZip(workflowEntries: Array<{ id: number; name: string 
}>): Observable<Blob> {

Reply via email to