This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 36b701388 [#4889] improvement(catalog-hadoop): Add the implementations
for `getFileLocation` interface in Catalog Hadoop (#4912)
36b701388 is described below
commit 36b701388d27428b13aadd6f6efe542261590a2e
Author: xloya <[email protected]>
AuthorDate: Thu Sep 12 01:21:44 2024 +0800
[#4889] improvement(catalog-hadoop): Add the implementations for
`getFileLocation` interface in Catalog Hadoop (#4912)
### What changes were proposed in this pull request?
Add the implementations for getFileLocation interface in Catalog Hadoop.
### Why are the changes needed?
Fix: #4889
### How was this patch tested?
Add some UTs.
---------
Co-authored-by: xiaojiebao <[email protected]>
Co-authored-by: Jerry Shao <[email protected]>
---
.../catalog/hadoop/HadoopCatalogOperations.java | 25 +++++++++++++-
.../hadoop/SecureHadoopCatalogOperations.java | 2 +-
.../hadoop/TestHadoopCatalogOperations.java | 40 ++++++++++++++++++++++
3 files changed, 65 insertions(+), 2 deletions(-)
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
index 0de7b27e1..194646491 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
@@ -69,6 +69,7 @@ public class HadoopCatalogOperations implements
CatalogOperations, SupportsSchem
private static final String SCHEMA_DOES_NOT_EXIST_MSG = "Schema %s does not
exist";
private static final String FILESET_DOES_NOT_EXIST_MSG = "Fileset %s does
not exist";
+ private static final String SLASH = "/";
private static final Logger LOG =
LoggerFactory.getLogger(HadoopCatalogOperations.class);
@@ -357,7 +358,29 @@ public class HadoopCatalogOperations implements
CatalogOperations, SupportsSchem
@Override
public String getFileLocation(NameIdentifier ident, String subPath)
throws NoSuchFilesetException {
- throw new UnsupportedOperationException("Not implemented");
+ // TODO we need move some check logics in the Hadoop / Python GVFS to here.
+ Preconditions.checkArgument(subPath != null, "subPath must not be null");
+ String processedSubPath;
+ if (!subPath.trim().isEmpty() && !subPath.trim().startsWith(SLASH)) {
+ processedSubPath = SLASH + subPath.trim();
+ } else {
+ processedSubPath = subPath.trim();
+ }
+
+ Fileset fileset = loadFileset(ident);
+
+ String fileLocation;
+ // subPath cannot be null, so we only need check if it is blank
+ if (StringUtils.isBlank(processedSubPath)) {
+ fileLocation = fileset.storageLocation();
+ } else {
+ String storageLocation =
+ fileset.storageLocation().endsWith(SLASH)
+ ? fileset.storageLocation().substring(0,
fileset.storageLocation().length() - 1)
+ : fileset.storageLocation();
+ fileLocation = String.format("%s%s", storageLocation, processedSubPath);
+ }
+ return fileLocation;
}
@Override
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/SecureHadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/SecureHadoopCatalogOperations.java
index 284fd491d..79777f75d 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/SecureHadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/SecureHadoopCatalogOperations.java
@@ -221,7 +221,7 @@ public class SecureHadoopCatalogOperations
@Override
public String getFileLocation(NameIdentifier ident, String subPath)
throws NoSuchFilesetException {
- throw new UnsupportedOperationException("Not implemented");
+ return hadoopCatalogOperations.getFileLocation(ident, subPath);
}
@Override
diff --git
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
index 3b9ca0d7f..9f08ed8b6 100644
---
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
@@ -762,6 +762,46 @@ public class TestHadoopCatalogOperations {
ImmutableMap.of()));
}
+ @Test
+ public void testGetFileLocation() throws IOException {
+ String schemaName = "schema1024";
+ String comment = "comment1024";
+ String schemaPath = TEST_ROOT_PATH + "/" + schemaName;
+ createSchema(schemaName, comment, null, schemaPath);
+
+ String catalogName = "c1";
+ String name = "fileset1024";
+ String storageLocation = TEST_ROOT_PATH + "/" + catalogName + "/" +
schemaName + "/" + name;
+ Fileset fileset =
+ createFileset(name, schemaName, comment, Fileset.Type.MANAGED, null,
storageLocation);
+
+ try (SecureHadoopCatalogOperations ops = new
SecureHadoopCatalogOperations(store)) {
+ ops.initialize(Maps.newHashMap(), randomCatalogInfo(),
HADOOP_PROPERTIES_METADATA);
+ NameIdentifier filesetIdent = NameIdentifier.of("m1", "c1", schemaName,
name);
+ // test sub path starts with "/"
+ String subPath1 = "/test/test.parquet";
+ String fileLocation1 = ops.getFileLocation(filesetIdent, subPath1);
+ Assertions.assertEquals(
+ String.format("%s%s", fileset.storageLocation(), subPath1),
fileLocation1);
+
+ // test sub path not starts with "/"
+ String subPath2 = "test/test.parquet";
+ String fileLocation2 = ops.getFileLocation(filesetIdent, subPath2);
+ Assertions.assertEquals(
+ String.format("%s/%s", fileset.storageLocation(), subPath2),
fileLocation2);
+
+ // test sub path is null
+ String subPath3 = null;
+ Assertions.assertThrows(
+ IllegalArgumentException.class, () ->
ops.getFileLocation(filesetIdent, subPath3));
+
+ // test sub path is blank but not null
+ String subPath4 = "";
+ String fileLocation3 = ops.getFileLocation(filesetIdent, subPath4);
+ Assertions.assertEquals(fileset.storageLocation(), fileLocation3);
+ }
+ }
+
private static Stream<Arguments> locationArguments() {
return Stream.of(
// Honor the catalog location