This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new b2a930225 [#4165] improvement(Filesystem): Improve the potential
storage replication issues in Hadoop GVFS (#4166)
b2a930225 is described below
commit b2a93022553a258a8f599f2e5a6e0bbdc3e5ace7
Author: xloya <[email protected]>
AuthorDate: Tue Jul 16 17:03:26 2024 +0800
[#4165] improvement(Filesystem): Improve the potential storage replication
issues in Hadoop GVFS (#4166)
### What changes were proposed in this pull request?
Currently, Hadoop GVFS does not implement the `getDefaultBlockSize(Path
f)` and `getBlockSize(Path f)` methods, which will result in the use of
the FileSystem default values, causing the storage replications and
block sizes to not meet expectations.
### Why are the changes needed?
Fix: #4165
### How was this patch tested?
Add UTs and ITs.
---------
Co-authored-by: xiaojiebao <[email protected]>
---
.../hadoop/GravitinoVirtualFileSystem.java | 12 +++++++
.../gravitino/filesystem/hadoop/TestGvfsBase.java | 16 +++++++++
.../hadoop/GravitinoVirtualFileSystemIT.java | 42 ++++++++++++++++++++++
3 files changed, 70 insertions(+)
diff --git
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
index ef51c1753..bbcf0c71e 100644
---
a/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
+++
b/clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java
@@ -515,6 +515,18 @@ public class GravitinoVirtualFileSystem extends FileSystem
{
return context.getFileSystem().mkdirs(context.getActualPath(), permission);
}
+ @Override
+ public short getDefaultReplication(Path f) {
+ FilesetContext context = getFilesetContext(f);
+ return
context.getFileSystem().getDefaultReplication(context.getActualPath());
+ }
+
+ @Override
+ public long getDefaultBlockSize(Path f) {
+ FilesetContext context = getFilesetContext(f);
+ return
context.getFileSystem().getDefaultBlockSize(context.getActualPath());
+ }
+
@Override
public synchronized void close() throws IOException {
// close all actual FileSystems
diff --git
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
index ce87d8d02..13b365a25 100644
---
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
+++
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/TestGvfsBase.java
@@ -591,4 +591,20 @@ public class TestGvfsBase extends GravitinoMockServerBase {
() -> fs.extractIdentifier(new
URI("/catalog1/schema1/fileset1/dir//")));
}
}
+
+ @Test
+ public void testGetDefaultReplications() throws IOException {
+ try (GravitinoVirtualFileSystem fs =
+ (GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) {
+ assertEquals(1, fs.getDefaultReplication(managedFilesetPath));
+ }
+ }
+
+ @Test
+ public void testGetDefaultBlockSize() throws IOException {
+ try (GravitinoVirtualFileSystem fs =
+ (GravitinoVirtualFileSystem) managedFilesetPath.getFileSystem(conf)) {
+ assertEquals(32 * 1024 * 1024,
fs.getDefaultBlockSize(managedFilesetPath));
+ }
+ }
}
diff --git
a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
index 9321080d9..feb8446be 100644
---
a/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
+++
b/integration-test/src/test/java/org/apache/gravitino/integration/test/client/filesystem/hadoop/GravitinoVirtualFileSystemIT.java
@@ -422,6 +422,48 @@ public class GravitinoVirtualFileSystemIT extends
AbstractIT {
}
}
+ @Test
+ public void testGetDefaultReplications() throws IOException {
+ String filesetName = "test_get_default_replications";
+ NameIdentifier filesetIdent = NameIdentifier.of(schemaName, filesetName);
+ Catalog catalog = metalake.loadCatalog(catalogName);
+ String storageLocation = genStorageLocation(filesetName);
+ catalog
+ .asFilesetCatalog()
+ .createFileset(
+ filesetIdent,
+ "fileset comment",
+ Fileset.Type.MANAGED,
+ storageLocation,
+ new HashMap<>());
+
Assertions.assertTrue(catalog.asFilesetCatalog().filesetExists(filesetIdent));
+ Path gvfsPath = genGvfsPath(filesetName);
+ try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
+ assertEquals(3, gvfs.getDefaultReplication(gvfsPath));
+ }
+ }
+
+ @Test
+ public void testGetDefaultBlockSizes() throws IOException {
+ String filesetName = "test_get_default_block_sizes";
+ NameIdentifier filesetIdent = NameIdentifier.of(schemaName, filesetName);
+ Catalog catalog = metalake.loadCatalog(catalogName);
+ String storageLocation = genStorageLocation(filesetName);
+ catalog
+ .asFilesetCatalog()
+ .createFileset(
+ filesetIdent,
+ "fileset comment",
+ Fileset.Type.MANAGED,
+ storageLocation,
+ new HashMap<>());
+
Assertions.assertTrue(catalog.asFilesetCatalog().filesetExists(filesetIdent));
+ Path gvfsPath = genGvfsPath(filesetName);
+ try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
+ assertEquals(128 * 1024 * 1024, gvfs.getDefaultBlockSize(gvfsPath));
+ }
+ }
+
private String genStorageLocation(String fileset) {
return String.format("%s/%s", baseHdfsPath(), fileset);
}