jerryshao commented on code in PR #5020:
URL: https://github.com/apache/gravitino/pull/5020#discussion_r1796584976
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java:
##########
@@ -119,32 +135,59 @@ public void initialize(
Map<String, String> config, CatalogInfo info, HasPropertyMetadata
propertiesMetadata)
throws RuntimeException {
this.propertiesMetadata = propertiesMetadata;
+ this.catalogInfo = info;
+
// Initialize Hadoop Configuration.
this.conf = config;
- this.hadoopConf = new Configuration();
- this.catalogInfo = info;
+
+ hadoopConf = new Configuration();
Map<String, String> bypassConfigs =
- config.entrySet().stream()
+ conf.entrySet().stream()
.filter(e -> e.getKey().startsWith(CATALOG_BYPASS_PREFIX))
.collect(
Collectors.toMap(
e -> e.getKey().substring(CATALOG_BYPASS_PREFIX.length()),
Map.Entry::getValue));
bypassConfigs.forEach(hadoopConf::set);
+ this.bypassConfigs = bypassConfigs;
+
+ initPluginFileSystem(config);
+
String catalogLocation =
(String)
propertiesMetadata
.catalogPropertiesMetadata()
.getOrDefault(config,
HadoopCatalogPropertiesMetadata.LOCATION);
- conf.forEach(hadoopConf::set);
-
this.catalogStorageLocation =
StringUtils.isNotBlank(catalogLocation)
? Optional.of(catalogLocation).map(Path::new)
: Optional.empty();
}
+ private void initPluginFileSystem(Map<String, String> config) {
+ String fileSystemProviders =
+ (String)
+ propertiesMetadata
+ .catalogPropertiesMetadata()
+ .getOrDefault(config,
HadoopCatalogPropertiesMetadata.FILESYSTEM_PROVIDER);
Review Comment:
providers or provider?
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java:
##########
@@ -75,8 +77,12 @@ public class HadoopCatalogOperations implements
CatalogOperations, SupportsSchem
private static final String SCHEMA_DOES_NOT_EXIST_MSG = "Schema %s does not
exist";
private static final String FILESET_DOES_NOT_EXIST_MSG = "Fileset %s does
not exist";
private static final String SLASH = "/";
+ public static final String DEFAULT_FS = "fs.defaultFS";
+ private static final String LOCAL_FILE_SCHEMA = "file";
+ public static final String LOCAL_FILE_PATH = "file:///";
Review Comment:
Move the definition of public field on top of private ones.
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java:
##########
@@ -119,32 +135,59 @@ public void initialize(
Map<String, String> config, CatalogInfo info, HasPropertyMetadata
propertiesMetadata)
throws RuntimeException {
this.propertiesMetadata = propertiesMetadata;
+ this.catalogInfo = info;
+
// Initialize Hadoop Configuration.
this.conf = config;
- this.hadoopConf = new Configuration();
- this.catalogInfo = info;
+
+ hadoopConf = new Configuration();
Map<String, String> bypassConfigs =
- config.entrySet().stream()
+ conf.entrySet().stream()
.filter(e -> e.getKey().startsWith(CATALOG_BYPASS_PREFIX))
.collect(
Collectors.toMap(
e -> e.getKey().substring(CATALOG_BYPASS_PREFIX.length()),
Map.Entry::getValue));
bypassConfigs.forEach(hadoopConf::set);
+ this.bypassConfigs = bypassConfigs;
+
+ initPluginFileSystem(config);
Review Comment:
`initializeFileSystemProviders`
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java:
##########
@@ -119,32 +135,59 @@ public void initialize(
Map<String, String> config, CatalogInfo info, HasPropertyMetadata
propertiesMetadata)
throws RuntimeException {
this.propertiesMetadata = propertiesMetadata;
+ this.catalogInfo = info;
+
// Initialize Hadoop Configuration.
this.conf = config;
- this.hadoopConf = new Configuration();
- this.catalogInfo = info;
+
+ hadoopConf = new Configuration();
Review Comment:
Do we still need this `hadoopConf`?
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java:
##########
@@ -742,4 +785,44 @@ private boolean checkSingleFile(Fileset fileset) {
fileset.name());
}
}
+
+ static FileSystem getFileSystem(Path path, Map<String, String> config)
throws IOException {
+ Map<String, String> newConfig = Maps.newHashMap(config);
+ String scheme;
+ Path fsPath;
+ if (path != null) {
+ scheme = path.toUri().getScheme();
+ if (scheme == null) {
+ scheme = LOCAL_FILE_SCHEMA;
Review Comment:
I think if the sheme is null, then it should be the one got from
`DEFAULT_FS`, not the local file system. Besides, your assumption is that user
have to set `fs.defaultFS` is config, right?
##########
catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java:
##########
@@ -34,6 +34,15 @@ public class HadoopCatalogPropertiesMetadata extends
BaseCatalogPropertiesMetada
// If not, users have to specify the storage location in the Schema or
Fileset level.
public static final String LOCATION = "location";
+ /**
+ * The implementation class name of the {@link FileSystemProvider} to be
used by the catalog.
+ * Gravitino supports LocalFileSystem and HDFS by default. Users can
implement their own by
+ * extending {@link FileSystemProvider} and specify the class name here.
+ *
+ * <p>The value can be 'xxxx.yyy.FileSystemProvider1,
xxxx.yyy.FileSystemProvider2'.
+ */
+ public static final String FILESYSTEM_PROVIDER = "filesystem.providers";
Review Comment:
Better to use "-", not "." for properties.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]