This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/main by this push:
new 076bc77ffa [#6940] feat(fileset): support disable Hadoop catalog auto
creation of directories (#6973)
076bc77ffa is described below
commit 076bc77ffa0900cb2c0b417fbf75363bdd61573b
Author: mchades <[email protected]>
AuthorDate: Thu Apr 17 16:25:03 2025 +0800
[#6940] feat(fileset): support disable Hadoop catalog auto creation of
directories (#6973)
### What changes were proposed in this pull request?
- support disable Hadoop catalog auto creation of directories
- when disable the FS ops from the server side, the GVFS will
automatically create the dir
### Why are the changes needed?
Fix: #6940
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
tests added
Co-authored-by: Jerry Shao <[email protected]>
---
.../catalog/hadoop/HadoopCatalogOperations.java | 153 +++++++++++--------
.../hadoop/HadoopCatalogPropertiesMetadata.java | 19 +++
.../catalog-hadoop/src/main/resources/hadoop.conf | 4 +
.../hadoop/TestHadoopCatalogOperations.java | 162 ++++++++++++++++++++-
.../test/GravitinoVirtualFileSystemIT.java | 37 +++--
...vitinoVirtualFilesystemWithFSOpsDisabledIT.java | 33 +++++
docs/hadoop-catalog.md | 3 +-
7 files changed, 335 insertions(+), 76 deletions(-)
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
index 74f7a0ebe9..b61000306f 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
@@ -117,6 +117,8 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
private FileSystemProvider defaultFileSystemProvider;
+ private boolean disableFSOps;
+
HadoopCatalogOperations(EntityStore store) {
this.store = store;
}
@@ -150,34 +152,33 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
throws RuntimeException {
this.propertiesMetadata = propertiesMetadata;
this.catalogInfo = info;
-
this.conf = config;
- String fileSystemProviders =
- (String)
+ this.disableFSOps =
+ (boolean)
propertiesMetadata
.catalogPropertiesMetadata()
- .getOrDefault(config,
HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS);
- this.fileSystemProvidersMap =
- ImmutableMap.<String, FileSystemProvider>builder()
-
.putAll(FileSystemUtils.getFileSystemProviders(fileSystemProviders))
- .build();
-
- String defaultFileSystemProviderName =
- (String)
- propertiesMetadata
- .catalogPropertiesMetadata()
- .getOrDefault(config,
HadoopCatalogPropertiesMetadata.DEFAULT_FS_PROVIDER);
- this.defaultFileSystemProvider =
- FileSystemUtils.getFileSystemProviderByName(
- fileSystemProvidersMap, defaultFileSystemProviderName);
-
- String catalogLocation =
- (String)
- propertiesMetadata
- .catalogPropertiesMetadata()
- .getOrDefault(config,
HadoopCatalogPropertiesMetadata.LOCATION);
- checkPlaceholderValue(catalogLocation);
+ .getOrDefault(config,
HadoopCatalogPropertiesMetadata.DISABLE_FILESYSTEM_OPS);
+ if (!disableFSOps) {
+ String fileSystemProviders =
+ (String)
+ propertiesMetadata
+ .catalogPropertiesMetadata()
+ .getOrDefault(config,
HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDERS);
+ this.fileSystemProvidersMap =
+ ImmutableMap.<String, FileSystemProvider>builder()
+
.putAll(FileSystemUtils.getFileSystemProviders(fileSystemProviders))
+ .build();
+
+ String defaultFileSystemProviderName =
+ (String)
+ propertiesMetadata
+ .catalogPropertiesMetadata()
+ .getOrDefault(config,
HadoopCatalogPropertiesMetadata.DEFAULT_FS_PROVIDER);
+ this.defaultFileSystemProvider =
+ FileSystemUtils.getFileSystemProviderByName(
+ fileSystemProvidersMap, defaultFileSystemProviderName);
+ }
this.catalogStorageLocations = getAndCheckCatalogStorageLocations(config);
}
@@ -291,40 +292,59 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
properties = setDefaultLocationIfAbsent(properties, filesetPaths);
ImmutableMap.Builder<String, Path> filesetPathsBuilder =
ImmutableMap.builder();
- try {
- // formalize the path to avoid path without scheme, uri, authority, etc.
- for (Map.Entry<String, Path> entry : filesetPaths.entrySet()) {
- Path formalizePath = formalizePath(entry.getValue(), conf);
- filesetPathsBuilder.put(entry.getKey(), formalizePath);
-
- FileSystem fs = getFileSystem(formalizePath, conf);
- if (!fs.exists(formalizePath)) {
- if (!fs.mkdirs(formalizePath)) {
- throw new RuntimeException(
- "Failed to create fileset "
- + ident
- + " location "
- + formalizePath
- + " with location name "
- + entry.getKey());
- }
+ if (disableFSOps) {
+ filesetPaths.forEach(
+ (locationName, location) -> {
+ // If the location does not have scheme and filesystem operations
are disabled in the
+ // server side, we cannot formalize the path by filesystem,
neither can we do in the
+ // client side, so we should throw an exception here.
+ if (location.toUri().getScheme() == null) {
+ throw new IllegalArgumentException(
+ "Storage location must have scheme for fileset if filesystem
operations are "
+ + "disabled in the server side, location: "
+ + location
+ + ", location name: "
+ + locationName);
+ }
- LOG.info(
- "Created fileset {} location {} with location name {}",
- ident,
- formalizePath,
- entry.getKey());
- } else {
- LOG.info(
- "Fileset {} manages the existing location {} with location name
{}",
- ident,
- formalizePath,
- entry.getKey());
+ filesetPathsBuilder.put(locationName, location);
+ });
+ } else {
+ try {
+ // formalize the path to avoid path without scheme, uri, authority,
etc.
+ for (Map.Entry<String, Path> entry : filesetPaths.entrySet()) {
+ Path formalizePath = formalizePath(entry.getValue(), conf);
+ filesetPathsBuilder.put(entry.getKey(), formalizePath);
+
+ FileSystem fs = getFileSystem(formalizePath, conf);
+ if (!fs.exists(formalizePath)) {
+ if (!fs.mkdirs(formalizePath)) {
+ throw new RuntimeException(
+ "Failed to create fileset "
+ + ident
+ + " location "
+ + formalizePath
+ + " with location name "
+ + entry.getKey());
+ }
+
+ LOG.info(
+ "Created fileset {} location {} with location name {}",
+ ident,
+ formalizePath,
+ entry.getKey());
+ } else {
+ LOG.info(
+ "Fileset {} manages the existing location {} with location
name {}",
+ ident,
+ formalizePath,
+ entry.getKey());
+ }
}
- }
- } catch (IOException ioe) {
- throw new RuntimeException("Failed to create fileset " + ident, ioe);
+ } catch (IOException ioe) {
+ throw new RuntimeException("Failed to create fileset " + ident, ioe);
+ }
}
Map<String, String> formattedStorageLocations =
@@ -456,7 +476,7 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
store.get(ident, Entity.EntityType.FILESET, FilesetEntity.class);
// For managed fileset, we should delete the related files.
- if (filesetEntity.filesetType() == Fileset.Type.MANAGED) {
+ if (!disableFSOps && filesetEntity.filesetType() ==
Fileset.Type.MANAGED) {
AtomicReference<IOException> exception = new AtomicReference<>();
Map<String, Path> storageLocations =
Maps.transformValues(filesetEntity.storageLocations(), Path::new);
@@ -524,7 +544,18 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
"Location name %s does not exist in fileset %s", locationName,
ident);
}
- boolean isSingleFile = checkSingleFile(fileset, locationName);
+ boolean isSingleFile = false;
+ if (disableFSOps) {
+ LOG.warn(
+ "Filesystem operations are disabled in the server side, we cannot
check if the "
+ + "storage location mounts to a directory or single file, we
assume it is a directory"
+ + "(in most of the cases). If it happens to be a single file,
then the generated "
+ + "file location may be a wrong path. Please avoid using Fileset
to manage a single"
+ + " file path.");
+ } else {
+ isSingleFile = checkSingleFile(fileset, locationName);
+ }
+
// if the storage location is a single file, it cannot have sub path to
access.
if (isSingleFile && StringUtils.isNotBlank(processedSubPath)) {
throw new GravitinoRuntimeException(
@@ -583,6 +614,10 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
@Override
public Schema createSchema(NameIdentifier ident, String comment, Map<String,
String> properties)
throws NoSuchCatalogException, SchemaAlreadyExistsException {
+ if (disableFSOps) {
+ return super.createSchema(ident, comment, properties);
+ }
+
try {
if (store.exists(ident, Entity.EntityType.SCHEMA)) {
throw new SchemaAlreadyExistsException("Schema %s already exists",
ident);
@@ -667,6 +702,10 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
Map<String, Path> schemaPaths = getAndCheckSchemaPaths(ident.name(),
properties);
boolean dropped = super.dropSchema(ident, cascade);
+ if (disableFSOps) {
+ return dropped;
+ }
+
// If the schema entity is failed to be deleted, we should not delete
the storage location
// and return false immediately.
if (!dropped) {
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
index f8dc025558..2aadea59ae 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
@@ -59,6 +59,15 @@ public class HadoopCatalogPropertiesMetadata extends
BaseCatalogPropertiesMetada
static final String FILESYSTEM_CONNECTION_TIMEOUT_SECONDS =
"filesystem-conn-timeout-secs";
static final int DEFAULT_GET_FILESYSTEM_TIMEOUT_SECONDS = 6;
+ /**
+ * The property to disable file system operations like list, exists, mkdir
operations in the
+ * server side, so that the server side catalog can be used as a metadata
only catalog, no need to
+ * configure the file system access related configurations. By default, it
is false.
+ */
+ static final String DISABLE_FILESYSTEM_OPS = "disable-filesystem-ops";
+
+ static final boolean DEFAULT_DISABLE_FILESYSTEM_OPS = false;
+
private static final Map<String, PropertyEntry<?>>
HADOOP_CATALOG_PROPERTY_ENTRIES =
ImmutableMap.<String, PropertyEntry<?>>builder()
.put(
@@ -108,6 +117,16 @@ public class HadoopCatalogPropertiesMetadata extends
BaseCatalogPropertiesMetada
false /* immutable */,
DEFAULT_GET_FILESYSTEM_TIMEOUT_SECONDS,
false /* hidden */))
+ .put(
+ DISABLE_FILESYSTEM_OPS,
+ PropertyEntry.booleanPropertyEntry(
+ DISABLE_FILESYSTEM_OPS,
+ "Disable file system operations in the server side",
+ false /* required */,
+ true /* immutable */,
+ DEFAULT_DISABLE_FILESYSTEM_OPS,
+ false /* hidden */,
+ false /* reserved */))
// The following two are about authentication.
.putAll(KERBEROS_PROPERTY_ENTRIES)
.putAll(AuthenticationConfig.AUTHENTICATION_PROPERTY_ENTRIES)
diff --git a/catalogs/catalog-hadoop/src/main/resources/hadoop.conf
b/catalogs/catalog-hadoop/src/main/resources/hadoop.conf
index 605c9b43a2..bd91a81e97 100644
--- a/catalogs/catalog-hadoop/src/main/resources/hadoop.conf
+++ b/catalogs/catalog-hadoop/src/main/resources/hadoop.conf
@@ -28,3 +28,7 @@
# the configuration will be directly by pass to backend engine, and
'fs.defaultFS' is the configuration key.
# gravitino.bypass.fs.defaultFS = hdfs://localhost:9000
+
+# If we want to disable the file system operations in the server side, like
mkdir, rm, etc, we can
+# set the 'disable-filesystem-ops' to true. By default, it is false.
+# disable-filesystem-ops = false
diff --git
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
index 97c01704ed..25c523ea70 100644
---
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/TestHadoopCatalogOperations.java
@@ -35,6 +35,7 @@ import static
org.apache.gravitino.Configs.VERSION_RETENTION_COUNT;
import static
org.apache.gravitino.catalog.hadoop.HadoopCatalog.CATALOG_PROPERTIES_META;
import static
org.apache.gravitino.catalog.hadoop.HadoopCatalog.FILESET_PROPERTIES_META;
import static
org.apache.gravitino.catalog.hadoop.HadoopCatalog.SCHEMA_PROPERTIES_META;
+import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.DISABLE_FILESYSTEM_OPS;
import static
org.apache.gravitino.catalog.hadoop.HadoopCatalogPropertiesMetadata.LOCATION;
import static org.apache.gravitino.file.Fileset.LOCATION_NAME_UNKNOWN;
import static org.apache.gravitino.file.Fileset.PROPERTY_DEFAULT_LOCATION_NAME;
@@ -219,8 +220,8 @@ public class TestHadoopCatalogOperations {
// Mock
MetalakeMetaService metalakeMetaService =
MetalakeMetaService.getInstance();
- MetalakeMetaService spyMetaservice = Mockito.spy(metalakeMetaService);
- doReturn(1L).when(spyMetaservice).getMetalakeIdByName(Mockito.anyString());
+ MetalakeMetaService spyMetaService = Mockito.spy(metalakeMetaService);
+ doReturn(1L).when(spyMetaService).getMetalakeIdByName(Mockito.anyString());
CatalogMetaService catalogMetaService = CatalogMetaService.getInstance();
CatalogMetaService spyCatalogMetaService = Mockito.spy(catalogMetaService);
@@ -305,7 +306,7 @@ public class TestHadoopCatalogOperations {
metalakeMetaServiceMockedStatic
.when(MetalakeMetaService::getInstance)
- .thenReturn(spyMetaservice);
+ .thenReturn(spyMetaService);
catalogMetaServiceMockedStatic
.when(CatalogMetaService::getInstance)
.thenReturn(spyCatalogMetaService);
@@ -415,6 +416,16 @@ public class TestHadoopCatalogOperations {
schemaPath = new Path(catalogPath, name);
fs = schemaPath.getFileSystem(new Configuration());
Assertions.assertFalse(fs.exists(schemaPath));
+
+ // Test disable server-side FS operations.
+ name = "schema12_2";
+ catalogPath = TEST_ROOT_PATH + "/" + "catalog12_2";
+ schema = createSchema(name, comment, catalogPath, null, true);
+ Assertions.assertEquals(name, schema.name());
+
+ // Schema path should not be existed if the server-side FS operations are
disabled.
+ schemaPath = new Path(catalogPath, name);
+ Assertions.assertFalse(fs.exists(schemaPath));
}
@Test
@@ -452,6 +463,15 @@ public class TestHadoopCatalogOperations {
Assertions.assertTrue(
exception.getMessage().contains("Placeholder in location should not be
empty"),
exception.getMessage());
+
+ // Test disable server-side FS operations.
+ name = "schema13_3";
+ schemaPath = catalogPath + "/" + name;
+ schema = createSchema(name, comment, null, schemaPath, true);
+ Assertions.assertEquals(name, schema.name());
+
+ // Schema path should not be existed if the server-side FS operations are
disabled.
+ Assertions.assertFalse(fs.exists(new Path(schemaPath)));
}
@Test
@@ -484,6 +504,17 @@ public class TestHadoopCatalogOperations {
Assertions.assertFalse(fs.exists(schemaPath1));
Assertions.assertFalse(fs.exists(new Path(catalogPath)));
Assertions.assertFalse(fs.exists(new Path(catalogPath, name)));
+
+ // Test disable server-side FS operations.
+ name = "schema14_2";
+ catalogPath = TEST_ROOT_PATH + "/" + "catalog14_2";
+ schemaPath = TEST_ROOT_PATH + "/" + "schema14_2";
+ schema = createSchema(name, comment, catalogPath, schemaPath, true);
+ Assertions.assertEquals(name, schema.name());
+
+ // Schema path should not be existed if the server-side FS operations are
disabled.
+ Assertions.assertFalse(fs.exists(new Path(catalogPath)));
+ Assertions.assertFalse(fs.exists(new Path(schemaPath)));
}
@Test
@@ -640,6 +671,38 @@ public class TestHadoopCatalogOperations {
}
}
+ @Test
+ public void testDropSchemaWithFSOpsDisabled() throws IOException {
+ String name = "schema21";
+ String comment = "comment21";
+ String catalogPath = TEST_ROOT_PATH + "/" + "catalog20";
+ Schema schema = createSchema(name, comment, catalogPath, null);
+ Assertions.assertEquals(name, schema.name());
+ NameIdentifier id = NameIdentifierUtil.ofSchema("m1", "c1", name);
+
+ try (SecureHadoopCatalogOperations ops = new
SecureHadoopCatalogOperations(store)) {
+ ops.initialize(
+ ImmutableMap.of(LOCATION, catalogPath, DISABLE_FILESYSTEM_OPS,
"true"),
+ randomCatalogInfo("m1", "c1"),
+ HADOOP_PROPERTIES_METADATA);
+
+ ops.dropSchema(id, false);
+
+ Path schemaPath = new Path(new Path(catalogPath), name);
+ FileSystem fs = schemaPath.getFileSystem(new Configuration());
+ Assertions.assertTrue(fs.exists(schemaPath));
+
+ createSchema(name, comment, catalogPath, null);
+ Fileset fs1 = createFileset("fs1", name, "comment",
Fileset.Type.MANAGED, catalogPath, null);
+ Path fs1Path = new Path(fs1.storageLocation());
+
+ // Test drop non-empty schema with cascade = true
+ ops.dropSchema(id, true);
+ Assertions.assertTrue(fs.exists(schemaPath));
+ Assertions.assertTrue(fs.exists(fs1Path));
+ }
+ }
+
@ParameterizedTest
@MethodSource("locationArguments")
public void testCreateLoadAndDeleteFilesetWithLocations(
@@ -696,6 +759,79 @@ public class TestHadoopCatalogOperations {
}
}
+ @ParameterizedTest
+ @MethodSource("locationArguments")
+ public void testCreateLoadAndDeleteFilesetWithLocationsWhenFSOpsDisabled(
+ String name,
+ Fileset.Type type,
+ String catalogPath,
+ String schemaPath,
+ String storageLocation,
+ String expect)
+ throws IOException {
+ String schemaName = "s1_" + name;
+ String comment = "comment_s1";
+ Map<String, String> catalogProps = Maps.newHashMap();
+ catalogProps.put(DISABLE_FILESYSTEM_OPS, "true");
+ if (catalogPath != null) {
+ catalogProps.put(LOCATION, catalogPath);
+ }
+
+ NameIdentifier schemaIdent = NameIdentifierUtil.ofSchema("m1", "c1",
schemaName);
+ try (SecureHadoopCatalogOperations ops = new
SecureHadoopCatalogOperations(store)) {
+ ops.initialize(catalogProps, randomCatalogInfo("m1", "c1"),
HADOOP_PROPERTIES_METADATA);
+ if (!ops.schemaExists(schemaIdent)) {
+ createSchema(schemaName, comment, catalogPath, schemaPath, true);
+ }
+
+ Fileset fileset;
+ try {
+ fileset =
+ createFileset(name, schemaName, "comment", type, catalogPath,
storageLocation, true);
+ } catch (Exception e) {
+ String locationPath =
+ storageLocation != null
+ ? storageLocation
+ : schemaPath != null ? schemaPath : catalogPath;
+ if (new Path(locationPath).toUri().getScheme() == null) {
+ Assertions.assertInstanceOf(IllegalArgumentException.class, e);
+ return;
+ } else {
+ throw e;
+ }
+ }
+
+ Assertions.assertEquals(name, fileset.name());
+ Assertions.assertEquals(type, fileset.type());
+ Assertions.assertEquals("comment", fileset.comment());
+ Assertions.assertEquals(expect, fileset.storageLocation());
+
+ // Fileset storage location should not be existed.
+ Path storagePath = new Path(fileset.storageLocation());
+ FileSystem fs = storagePath.getFileSystem(new Configuration());
+ Assertions.assertFalse(fs.exists(storagePath));
+
+ // Test load
+ NameIdentifier filesetIdent = NameIdentifier.of("m1", "c1", schemaName,
name);
+ Fileset loadedFileset = ops.loadFileset(filesetIdent);
+ Assertions.assertEquals(name, loadedFileset.name());
+ Assertions.assertEquals(type, loadedFileset.type());
+ Assertions.assertEquals("comment", loadedFileset.comment());
+ Assertions.assertEquals(expect, loadedFileset.storageLocation());
+
+ // Test drop
+ ops.dropFileset(filesetIdent);
+ Path expectedPath = new Path(expect);
+ Assertions.assertFalse(fs.exists(expectedPath));
+
+ // clean expected path if exist
+ fs.delete(expectedPath, true);
+
+ // Test drop non-existent fileset
+ Assertions.assertFalse(ops.dropFileset(filesetIdent), "fileset should be
non-existent");
+ }
+ }
+
@Test
public void testCreateFilesetWithExceptions() throws IOException {
String schemaName = "schema22";
@@ -2378,7 +2514,14 @@ public class TestHadoopCatalogOperations {
private Schema createSchema(String name, String comment, String catalogPath,
String schemaPath)
throws IOException {
+ return createSchema(name, comment, catalogPath, schemaPath, false);
+ }
+
+ private Schema createSchema(
+ String name, String comment, String catalogPath, String schemaPath,
boolean disableFsOps)
+ throws IOException {
Map<String, String> props = Maps.newHashMap();
+ props.put(DISABLE_FILESYSTEM_OPS, String.valueOf(disableFsOps));
if (catalogPath != null) {
props.put(LOCATION, catalogPath);
}
@@ -2448,7 +2591,20 @@ public class TestHadoopCatalogOperations {
String catalogPath,
String storageLocation)
throws IOException {
+ return createFileset(name, schemaName, comment, type, catalogPath,
storageLocation, false);
+ }
+
+ private Fileset createFileset(
+ String name,
+ String schemaName,
+ String comment,
+ Fileset.Type type,
+ String catalogPath,
+ String storageLocation,
+ boolean disableFsOps)
+ throws IOException {
Map<String, String> props = Maps.newHashMap();
+ props.put(DISABLE_FILESYSTEM_OPS, String.valueOf(disableFsOps));
if (catalogPath != null) {
props.put(LOCATION, catalogPath);
}
diff --git
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemIT.java
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemIT.java
index 8de5267f19..0b5c5995e1 100644
---
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemIT.java
+++
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFileSystemIT.java
@@ -68,6 +68,7 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
protected Configuration conf = new Configuration();
protected int defaultBlockSize = 128 * 1024 * 1024;
protected int defaultReplication = 3;
+ protected Map<String, String> properties = Maps.newHashMap();
@BeforeAll
public void startUp() throws Exception {
@@ -76,7 +77,6 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
metalake = client.createMetalake(metalakeName, "metalake comment",
Collections.emptyMap());
Assertions.assertTrue(client.metalakeExists(metalakeName));
- Map<String, String> properties = Maps.newHashMap();
Catalog catalog =
metalake.createCatalog(
catalogName, Catalog.Type.FILESET, "hadoop", "catalog comment",
properties);
@@ -189,10 +189,11 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs create
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
String fileName = "test.txt";
Path createPath = new Path(gvfsPath + "/" + fileName);
// GCS need to close the stream to create the file manually.
@@ -226,13 +227,14 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs append
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
String fileName = "test.txt";
Path appendPath = new Path(gvfsPath + "/" + fileName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
gvfs.create(appendPath).close();
Assertions.assertTrue(gvfs.exists(appendPath));
Assertions.assertTrue(gvfs.getFileStatus(appendPath).isFile());
@@ -287,12 +289,13 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs delete
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
String fileName = "test.txt";
Path deletePath = new Path(gvfsPath + "/" + fileName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
gvfs.create(deletePath).close();
Assertions.assertTrue(gvfs.exists(deletePath));
Assertions.assertTrue(gvfs.getFileStatus(deletePath).isFile());
@@ -328,12 +331,13 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs get status
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
String fileName = "test.txt";
Path statusPath = new Path(gvfsPath + "/" + fileName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
gvfs.create(statusPath).close();
Assertions.assertTrue(gvfs.exists(statusPath));
Assertions.assertTrue(gvfs.getFileStatus(statusPath).isFile());
@@ -372,13 +376,14 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs list status
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
for (int i = 0; i < 10; i++) {
String fileName = "test_" + i + ".txt";
Path statusPath = new Path(gvfsPath + "/" + fileName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
gvfs.create(statusPath).close();
Assertions.assertTrue(gvfs.exists(statusPath));
Assertions.assertTrue(gvfs.getFileStatus(statusPath).isFile());
@@ -430,10 +435,11 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs mkdirs
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
String dirName = "test";
Path dirPath = new Path(gvfsPath + "/" + dirName);
gvfs.mkdirs(dirPath);
@@ -466,13 +472,14 @@ public class GravitinoVirtualFileSystemIT extends BaseIT {
// test gvfs rename
Path hdfsPath = new Path(storageLocation);
try (FileSystem fs =
hdfsPath.getFileSystem(convertGvfsConfigToRealFileSystemConfig(conf))) {
- Assertions.assertTrue(fs.exists(hdfsPath));
Path gvfsPath = genGvfsPath(filesetName);
String srcName = "test_src";
Path srcPath = new Path(gvfsPath + "/" + srcName);
try (FileSystem gvfs = gvfsPath.getFileSystem(conf)) {
- Assertions.assertTrue(gvfs.exists(gvfsPath));
+ if (!gvfs.exists(gvfsPath)) {
+ gvfs.mkdirs(gvfsPath);
+ }
gvfs.mkdirs(srcPath);
Assertions.assertTrue(gvfs.exists(srcPath));
Assertions.assertTrue(gvfs.getFileStatus(srcPath).isDirectory());
diff --git
a/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFilesystemWithFSOpsDisabledIT.java
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFilesystemWithFSOpsDisabledIT.java
new file mode 100644
index 0000000000..2564bac84f
--- /dev/null
+++
b/clients/filesystem-hadoop3/src/test/java/org/apache/gravitino/filesystem/hadoop/integration/test/GravitinoVirtualFilesystemWithFSOpsDisabledIT.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.filesystem.hadoop.integration.test;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Tag;
+
+@Tag("gravitino-docker-test")
+public class GravitinoVirtualFilesystemWithFSOpsDisabledIT extends
GravitinoVirtualFileSystemIT {
+
+ @BeforeAll
+ @Override
+ public void startUp() throws Exception {
+ properties.put("disable-filesystem-ops", "true");
+ super.startUp();
+ }
+}
diff --git a/docs/hadoop-catalog.md b/docs/hadoop-catalog.md
index 1c255484e5..c187a80f3b 100644
--- a/docs/hadoop-catalog.md
+++ b/docs/hadoop-catalog.md
@@ -27,9 +27,10 @@ Besides the [common catalog
properties](./gravitino-server-config.md#apache-grav
|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|----------|------------------|
| `location` | The storage location managed by Hadoop
catalog.
| (none) | No | 0.5.0 |
| `default-filesystem-provider` | The default filesystem provider of this
Hadoop catalog if users do not specify the scheme in the URI. Candidate values
are 'builtin-local', 'builtin-hdfs', 's3', 'gcs', 'abs' and 'oss'. Default
value is `builtin-local`. For S3, if we set this value to 's3', we can omit the
prefix 's3a://' in the location. | `builtin-local` | No |
0.7.0-incubating |
-| `filesystem-providers` | The file system providers to add. Users
need to set this configuration to support cloud storage or custom HCFS. For
instance, set it to `s3` or a comma separated string that contains `s3` like
`gs,s3` to support multiple kinds of fileset including `s3`.
| (none) | Yes |
0.7.0-incubating |
+| `filesystem-providers` | The file system providers to add. Users
need to set this configuration to support cloud storage or custom HCFS. For
instance, set it to `s3` or a comma separated string that contains `s3` like
`gs,s3` to support multiple kinds of fileset including `s3`.
| (none) | Yes |
0.7.0-incubating |
| `credential-providers` | The credential provider types, separated by
comma.
| (none) | No | 0.8.0-incubating |
| `filesystem-conn-timeout-secs` | The timeout of getting the file system
using Hadoop FileSystem client instance. Time unit: seconds.
| 6 | No | 0.8.0-incubating |
+| `disable-filesystem-ops` | The configuration to disable file system
operations in the server side. If set to true, the Hadoop catalog in the server
side will not create, drop files or folder when the schema, fileset is created,
dropped.
| false | No | 0.9.0-incubating |
Please refer to [Credential vending](./security/credential-vending.md) for
more details about credential vending.