This is an automated email from the ASF dual-hosted git repository.
aicam pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/texera.git
The following commit(s) were added to refs/heads/master by this push:
new f2214a1828 feat: switch single-file dataset downloads to use browser
native downloads (#3621)
f2214a1828 is described below
commit f2214a1828370745fa4434a95f4b72cd96cd6c21
Author: Madison Lin <[email protected]>
AuthorDate: Sun Aug 10 19:35:34 2025 -0700
feat: switch single-file dataset downloads to use browser native downloads
(#3621)
### Purpose ###
This pull request partially addresses issue #3404 by changing the
download method of single-file dataset downloads to use browser native
downloads. Using the browser to handle downloads enables the browser's
file download UI, which includes a progress bar when downloading larger
files.
Previously, we would generate a pre-signed URL for the file via LakeFS
API and download it as a Blob. Now, we generate a pre-signed URL for the
file via S3 Gateway API, which offers additional flexibility, and click
on the URL to trigger browser native download.
### Changes ###
- added method to generate pre-signed URL via S3 Gateway API, with
additional flexibility to specify file name and type
- updated frontend and backend to directly click on a pre-signed URL
generated via the new method for single file dataset downloads
- removed success and error notifications for single file dataset
downloads (the success and error of a file download cannot be obtained
when using browser native download)
- updated unit tests to verify new download method
Changed Files:
-
file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
-
file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
-
gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
- gui/src/app/dashboard/service/user/dataset/dataset.service.ts
- gui/src/app/dashboard/service/user/download/download.service.spec.ts
- gui/src/app/dashboard/service/user/download/download.service.ts
---
.../texera/service/resource/DatasetResource.scala | 114 ++++++++++++++++-----
.../ics/texera/service/util/S3StorageClient.scala | 67 +++++++++++-
.../dataset-detail.component.ts | 3 +-
.../service/user/dataset/dataset.service.ts | 23 +++++
.../service/user/download/download.service.spec.ts | 42 --------
.../service/user/download/download.service.ts | 13 +--
6 files changed, 183 insertions(+), 79 deletions(-)
diff --git
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
index 37e48f6631..55722395aa 100644
---
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
+++
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/resource/DatasetResource.scala
@@ -195,6 +195,7 @@ object DatasetResource {
class DatasetResource {
private val ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE = "User has no access
to this dataset"
private val ERR_DATASET_VERSION_NOT_FOUND_MESSAGE = "The version of the
dataset not found"
+ private val EXPIRATION_MINUTES = 5
/**
* Helper function to get the dataset from DB with additional information
including user access privilege and owner email
@@ -564,6 +565,19 @@ class DatasetResource {
generatePresignedResponse(encodedUrl, datasetName, commitHash, uid)
}
+ @GET
+ @RolesAllowed(Array("REGULAR", "ADMIN"))
+ @Path("/presign-download-s3")
+ def getPresignedUrlWithS3(
+ @QueryParam("filePath") encodedUrl: String,
+ @QueryParam("datasetName") datasetName: String,
+ @QueryParam("commitHash") commitHash: String,
+ @Auth user: SessionUser
+ ): Response = {
+ val uid = user.getUid
+ generatePresignedResponseWithS3(encodedUrl, datasetName, commitHash, uid)
+ }
+
@GET
@Path("/public-presign-download")
def getPublicPresignedUrl(
@@ -571,9 +585,17 @@ class DatasetResource {
@QueryParam("datasetName") datasetName: String,
@QueryParam("commitHash") commitHash: String
): Response = {
- val user = new SessionUser(new User())
- val uid = user.getUid
- generatePresignedResponse(encodedUrl, datasetName, commitHash, uid)
+ generatePresignedResponse(encodedUrl, datasetName, commitHash, null)
+ }
+
+ @GET
+ @Path("/public-presign-download-s3")
+ def getPublicPresignedUrlWithS3(
+ @QueryParam("filePath") encodedUrl: String,
+ @QueryParam("datasetName") datasetName: String,
+ @QueryParam("commitHash") commitHash: String
+ ): Response = {
+ generatePresignedResponseWithS3(encodedUrl, datasetName, commitHash, null)
}
@DELETE
@@ -1200,34 +1222,83 @@ class DatasetResource {
commitHash: String,
uid: Integer
): Response = {
+ resolveDatasetAndPath(encodedUrl, datasetName, commitHash, uid) match {
+ case Left(errorResponse) =>
+ errorResponse
+
+ case Right((resolvedDatasetName, resolvedCommitHash, resolvedFilePath))
=>
+ val url = LakeFSStorageClient.getFilePresignedUrl(
+ resolvedDatasetName,
+ resolvedCommitHash,
+ resolvedFilePath
+ )
+
+ Response.ok(Map("presignedUrl" -> url)).build()
+ }
+ }
+
+ private def generatePresignedResponseWithS3(
+ encodedUrl: String,
+ datasetName: String,
+ commitHash: String,
+ uid: Integer
+ ): Response = {
+ resolveDatasetAndPath(encodedUrl, datasetName, commitHash, uid) match {
+ case Left(errorResponse) =>
+ errorResponse
+
+ case Right((resolvedDatasetName, resolvedCommitHash, resolvedFilePath))
=>
+ val fileName =
resolvedFilePath.split("/").lastOption.getOrElse("download")
+ val contentType = "application/octet-stream"
+ val url = S3StorageClient.getFilePresignedUrl(
+ resolvedDatasetName,
+ resolvedCommitHash,
+ resolvedFilePath,
+ fileName,
+ contentType,
+ EXPIRATION_MINUTES
+ )
+
+ Response.ok(Map("presignedUrl" -> url)).build()
+ }
+ }
+
+ private def resolveDatasetAndPath(
+ encodedUrl: String,
+ datasetName: String,
+ commitHash: String,
+ uid: Integer
+ ): Either[Response, (String, String, String)] = {
val decodedPathStr = URLDecoder.decode(encodedUrl,
StandardCharsets.UTF_8.name())
(Option(datasetName), Option(commitHash)) match {
case (Some(_), None) | (None, Some(_)) =>
// Case 1: Only one parameter is provided (error case)
- Response
- .status(Response.Status.BAD_REQUEST)
- .entity(
- "Both datasetName and commitHash must be provided together, or
neither should be provided."
- )
- .build()
+ Left(
+ Response
+ .status(Response.Status.BAD_REQUEST)
+ .entity(
+ "Both datasetName and commitHash must be provided together, or
neither should be provided."
+ )
+ .build()
+ )
case (Some(dsName), Some(commit)) =>
// Case 2: datasetName and commitHash are provided, validate access
- withTransaction(context) { ctx =>
+ val response = withTransaction(context) { ctx =>
val datasetDao = new DatasetDao(ctx.configuration())
val datasets = datasetDao.fetchByName(dsName).asScala.toList
if (datasets.isEmpty || !userHasReadAccess(ctx,
datasets.head.getDid, uid))
throw new
ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE)
- val url = LakeFSStorageClient.getFilePresignedUrl(dsName, commit,
decodedPathStr)
- Response.ok(Map("presignedUrl" -> url)).build()
+ (dsName, commit, decodedPathStr)
}
+ Right(response)
case (None, None) =>
// Case 3: Neither datasetName nor commitHash are provided, resolve
normally
- withTransaction(context) { ctx =>
+ val response = withTransaction(context) { ctx =>
val fileUri = FileResolver.resolve(decodedPathStr)
val document =
DocumentFactory.openReadonlyDocument(fileUri).asInstanceOf[OnDataset]
val datasetDao = new DatasetDao(ctx.configuration())
@@ -1236,18 +1307,13 @@ class DatasetResource {
if (datasets.isEmpty || !userHasReadAccess(ctx,
datasets.head.getDid, uid))
throw new
ForbiddenException(ERR_USER_HAS_NO_ACCESS_TO_DATASET_MESSAGE)
- Response
- .ok(
- Map(
- "presignedUrl" -> LakeFSStorageClient.getFilePresignedUrl(
- document.getDatasetName(),
- document.getVersionHash(),
- document.getFileRelativePath()
- )
- )
- )
- .build()
+ (
+ document.getDatasetName(),
+ document.getVersionHash(),
+ document.getFileRelativePath()
+ )
}
+ Right(response)
}
}
}
diff --git
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
index 2b1afd1165..ae110fec52 100644
---
a/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
+++
b/core/file-service/src/main/scala/edu/uci/ics/texera/service/util/S3StorageClient.scala
@@ -24,7 +24,12 @@ import
software.amazon.awssdk.auth.credentials.{AwsBasicCredentials, StaticCrede
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.s3.{S3Client, S3Configuration}
import software.amazon.awssdk.services.s3.model._
+import software.amazon.awssdk.services.s3.presigner.S3Presigner
+import
software.amazon.awssdk.services.s3.presigner.model.GetObjectPresignRequest
+import software.amazon.awssdk.services.s3.model.GetObjectRequest
+import java.net.URI
+import java.time.Duration
import java.security.MessageDigest
import scala.jdk.CollectionConverters._
@@ -36,10 +41,10 @@ import scala.jdk.CollectionConverters._
object S3StorageClient {
val MINIMUM_NUM_OF_MULTIPART_S3_PART: Long = 5L * 1024 * 1024 // 5 MiB
val MAXIMUM_NUM_OF_MULTIPART_S3_PARTS = 10_000
+ val credentials = AwsBasicCredentials.create(StorageConfig.s3Username,
StorageConfig.s3Password)
// Initialize MinIO-compatible S3 Client
private lazy val s3Client: S3Client = {
- val credentials = AwsBasicCredentials.create(StorageConfig.s3Username,
StorageConfig.s3Password)
S3Client
.builder()
.credentialsProvider(StaticCredentialsProvider.create(credentials))
@@ -51,6 +56,29 @@ object S3StorageClient {
.build()
}
+ // Initialize S3-compatible presigner for LakeFS S3 Gateway
+ private lazy val s3Presigner: S3Presigner = {
+ val fullUri = new URI(StorageConfig.lakefsEndpoint)
+ val baseUri = new URI(
+ fullUri.getScheme,
+ null,
+ fullUri.getHost,
+ fullUri.getPort,
+ null,
+ null,
+ null
+ ) // Extract just the base (scheme + host + port)
+ S3Presigner
+ .builder()
+ .credentialsProvider(StaticCredentialsProvider.create(credentials))
+ .region(Region.of(StorageConfig.s3Region))
+ .endpointOverride(baseUri) // LakeFS base URL ("http://localhost:8000"
on local)
+ .serviceConfiguration(
+ S3Configuration.builder().pathStyleAccessEnabled(true).build()
+ )
+ .build()
+ }
+
/**
* Checks if a directory (prefix) exists within an S3 bucket.
*
@@ -139,4 +167,41 @@ object S3StorageClient {
s3Client.deleteObjects(deleteObjectsRequest)
}
}
+
+ /**
+ * Retrieves file content from a specific commit and path.
+ *
+ * @param repoName Repository name.
+ * @param commitHash Commit hash of the version.
+ * @param filePath Path to the file in the repository.
+ * @param fileName Name of the file downloaded via the presigned
URL.
+ * @param contentType Type of the file downloaded via the presigned
URL.
+ * @param expirationMinutes Duration in minutes that the presigned URL is
valid.
+ */
+ def getFilePresignedUrl(
+ repoName: String,
+ commitHash: String,
+ filePath: String,
+ fileName: String,
+ contentType: String,
+ expirationMinutes: Long
+ ): String = {
+ val getObjectRequest = GetObjectRequest
+ .builder()
+ .bucket(repoName)
+ .key(s"$commitHash/$filePath")
+ .responseContentDisposition(s"attachment; filename='$fileName'")
+ .responseContentType(contentType)
+ .build()
+
+ val presignRequest = GetObjectPresignRequest
+ .builder()
+ .signatureDuration(Duration.ofMinutes(expirationMinutes))
+ .getObjectRequest(getObjectRequest)
+ .build()
+
+ val presignedUrl =
s3Presigner.presignGetObject(presignRequest).url().toString
+ s3Presigner.close()
+ presignedUrl
+ }
}
diff --git
a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
index 9dd4de0c40..fb40c946bc 100644
---
a/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
+++
b/core/gui/src/app/dashboard/component/user/user-dataset/user-dataset-explorer/dataset-detail.component.ts
@@ -269,8 +269,7 @@ export class DatasetDetailComponent implements OnInit {
onClickDownloadCurrentFile = (): void => {
if (!this.did || !this.selectedVersion?.dvid) return;
-
-
this.downloadService.downloadSingleFile(this.currentDisplayedFileName).pipe(untilDestroyed(this)).subscribe();
+ this.downloadService.downloadSingleFile(this.currentDisplayedFileName);
};
onClickScaleTheView() {
diff --git a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
index 226b1edb97..333be35fa3 100644
--- a/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
+++ b/core/gui/src/app/dashboard/service/user/dataset/dataset.service.ts
@@ -93,6 +93,29 @@ export class DatasetService {
.pipe(switchMap(({ presignedUrl }) => this.http.get(presignedUrl, {
responseType: "blob" })));
}
+ /**
+ * Retrieves a single file from a dataset version using a pre-signed URL.
+ * @param filePath Relative file path within the dataset.
+ * @param isLogin Determine whether a user is currently logged in
+ * @returns void File is downloaded natively by the browser.
+ */
+ public retrieveDatasetVersionSingleFileViaBrowser(filePath: string, isLogin:
boolean = true): void {
+ const endpointSegment = isLogin ? "presign-download-s3" :
"public-presign-download-s3";
+ const endpoint =
`${AppSettings.getApiEndpoint()}/${DATASET_BASE_URL}/${endpointSegment}?filePath=${encodeURIComponent(filePath)}`;
+
+ this.http.get<{ presignedUrl: string }>(endpoint).subscribe({
+ next: response => {
+ const presignedUrl = response.presignedUrl;
+ const downloadUrl = document.createElement("a");
+
+ downloadUrl.href = presignedUrl;
+ document.body.appendChild(downloadUrl);
+ downloadUrl.click();
+ downloadUrl.remove();
+ },
+ });
+ }
+
/**
* Retrieves a zip file of a dataset version.
* @param did Dataset ID
diff --git
a/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
b/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
index f6d5a4b4b1..d1c55a434f 100644
--- a/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
+++ b/core/gui/src/app/dashboard/service/user/download/download.service.spec.ts
@@ -60,48 +60,6 @@ describe("DownloadService", () => {
notificationServiceSpy = TestBed.inject(NotificationService) as
jasmine.SpyObj<NotificationService>;
});
- it("should download a single file successfully", (done: DoneFn) => {
- const filePath = "test/file.txt";
- const mockBlob = new Blob(["test content"], { type: "text/plain" });
-
-
datasetServiceSpy.retrieveDatasetVersionSingleFile.and.returnValue(of(mockBlob));
-
- downloadService.downloadSingleFile(filePath).subscribe({
- next: blob => {
- expect(blob).toBe(mockBlob);
- expect(notificationServiceSpy.info).toHaveBeenCalledWith("Starting to
download file test/file.txt");
-
expect(datasetServiceSpy.retrieveDatasetVersionSingleFile).toHaveBeenCalledWith(filePath);
- expect(fileSaverServiceSpy.saveAs).toHaveBeenCalledWith(mockBlob,
"file.txt");
- expect(notificationServiceSpy.success).toHaveBeenCalledWith("File
test/file.txt has been downloaded");
- done();
- },
- error: (error: unknown) => {
- fail("Should not have thrown an error: " + error);
- },
- });
- });
-
- it("should handle download failure correctly", done => {
- const filePath = "test/file.txt";
- const errorMessage = "Download failed";
-
-
datasetServiceSpy.retrieveDatasetVersionSingleFile.and.returnValue(throwError(()
=> new Error(errorMessage)));
-
- downloadService.downloadSingleFile(filePath).subscribe({
- next: () => {
- fail("Should have thrown an error");
- },
- error: (error: unknown) => {
- expect(error).toBeTruthy();
- expect(notificationServiceSpy.info).toHaveBeenCalledWith("Starting to
download file test/file.txt");
-
expect(datasetServiceSpy.retrieveDatasetVersionSingleFile).toHaveBeenCalledWith(filePath);
- expect(fileSaverServiceSpy.saveAs).not.toHaveBeenCalled();
- expect(notificationServiceSpy.error).toHaveBeenCalledWith("Error
downloading file 'test/file.txt'");
- done();
- },
- });
- });
-
it("should download a dataset successfully", done => {
const datasetId = 1;
const datasetName = "TestDataset";
diff --git
a/core/gui/src/app/dashboard/service/user/download/download.service.ts
b/core/gui/src/app/dashboard/service/user/download/download.service.ts
index 813dea08e2..39b89f0f01 100644
--- a/core/gui/src/app/dashboard/service/user/download/download.service.ts
+++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts
@@ -93,16 +93,9 @@ export class DownloadService {
);
}
- downloadSingleFile(filePath: string): Observable<Blob> {
- const DEFAULT_FILE_NAME = "download";
- const fileName = filePath.split("/").pop() || DEFAULT_FILE_NAME;
- return this.downloadWithNotification(
- () => this.datasetService.retrieveDatasetVersionSingleFile(filePath),
- fileName,
- `Starting to download file ${filePath}`,
- `File ${filePath} has been downloaded`,
- `Error downloading file '${filePath}'`
- );
+ downloadSingleFile(filePath: string): void {
+ this.notificationService.info(`Starting to download file ${filePath}`);
+ this.datasetService.retrieveDatasetVersionSingleFileViaBrowser(filePath);
}
downloadWorkflowsAsZip(workflowEntries: Array<{ id: number; name: string
}>): Observable<Blob> {