This is an automated email from the ASF dual-hosted git repository.
jshao pushed a commit to branch branch-0.9
in repository https://gitbox.apache.org/repos/asf/gravitino.git
The following commit(s) were added to refs/heads/branch-0.9 by this push:
new 0a243087ac [#6889] feat(fileset): add multiple locations APIs for
fileset (#6913)
0a243087ac is described below
commit 0a243087ac64df8e4962f35833467181200f29b0
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Mon Apr 14 14:02:43 2025 +0800
[#6889] feat(fileset): add multiple locations APIs for fileset (#6913)
### What changes were proposed in this pull request?
add multiple locations APIs for fileset
### Why are the changes needed?
Fix: #6889
### Does this PR introduce _any_ user-facing change?
no, not implement, just add new API
### How was this patch tested?
CI pass
Co-authored-by: mchades <[email protected]>
---
.../exceptions/NoSuchLocationNameException.java | 48 ++++++++++
.../java/org/apache/gravitino/file/Fileset.java | 102 ++++++++++++++++++---
.../org/apache/gravitino/file/FilesetCatalog.java | 64 ++++++++++++-
.../catalog/hadoop/HadoopCatalogOperations.java | 20 ++--
.../hadoop/HadoopCatalogPropertiesMetadata.java | 17 ++++
.../hadoop/HadoopFilesetPropertiesMetadata.java | 24 ++---
.../hadoop/HadoopSchemaPropertiesMetadata.java | 18 ++++
.../hadoop/integration/test/HadoopCatalogIT.java | 4 +-
.../client-python/gravitino/api/file/fileset.py | 98 ++++++++++++++++++--
.../gravitino/client/fileset_catalog.py | 51 +++++++++--
.../gravitino/client/generic_fileset.py | 4 +
clients/client-python/gravitino/dto/fileset_dto.py | 4 +
clients/client-python/gravitino/exceptions/base.py | 4 +
13 files changed, 405 insertions(+), 53 deletions(-)
diff --git
a/api/src/main/java/org/apache/gravitino/exceptions/NoSuchLocationNameException.java
b/api/src/main/java/org/apache/gravitino/exceptions/NoSuchLocationNameException.java
new file mode 100644
index 0000000000..9cdba1d445
--- /dev/null
+++
b/api/src/main/java/org/apache/gravitino/exceptions/NoSuchLocationNameException.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.gravitino.exceptions;
+
+import com.google.errorprone.annotations.FormatMethod;
+
+/** Exception thrown when a fileset location name is not found. */
+public class NoSuchLocationNameException extends NotFoundException {
+
+ /**
+ * Constructs a new exception with the specified detail message.
+ *
+ * @param message the detail message.
+ * @param args the arguments to the message.
+ */
+ @FormatMethod
+ public NoSuchLocationNameException(String message, Object... args) {
+ super(message, args);
+ }
+
+ /**
+ * Constructs a new exception with the specified detail message and cause.
+ *
+ * @param cause the cause.
+ * @param message the detail message.
+ * @param args the arguments to the message.
+ */
+ @FormatMethod
+ public NoSuchLocationNameException(Throwable cause, String message,
Object... args) {
+ super(cause, message, args);
+ }
+}
diff --git a/api/src/main/java/org/apache/gravitino/file/Fileset.java
b/api/src/main/java/org/apache/gravitino/file/Fileset.java
index 48cccf78b3..2698ea3e05 100644
--- a/api/src/main/java/org/apache/gravitino/file/Fileset.java
+++ b/api/src/main/java/org/apache/gravitino/file/Fileset.java
@@ -42,26 +42,35 @@ import org.apache.gravitino.tag.SupportsTags;
@Evolving
public interface Fileset extends Auditable {
+ /** The prefix of the location name in the property at the catalog/schema
level. */
+ String PROPERTY_MULTIPLE_LOCATIONS_PREFIX = "location-";
+
/** The prefix of fileset placeholder property */
- String LOCATION_PLACEHOLDER_PREFIX = "placeholder-";
+ String PROPERTY_LOCATION_PLACEHOLDER_PREFIX = "placeholder-";
/**
* The reserved property name for the catalog name placeholder, when
creating a fileset, all
* placeholders as {{catalog}} will be replaced by the catalog name
*/
- String RESERVED_CATALOG_PLACEHOLDER = "placeholder-catalog";
+ String PROPERTY_CATALOG_PLACEHOLDER = "placeholder-catalog";
/**
* The reserved property name for the schema name placeholder, when creating
a fileset, all
* placeholders as {{schema}} will be replaced by the schema name
*/
- String RESERVED_SCHEMA_PLACEHOLDER = "placeholder-schema";
+ String PROPERTY_SCHEMA_PLACEHOLDER = "placeholder-schema";
/**
* The reserved property name for the fileset name placeholder, when
creating a fileset, all
* placeholders as {{fileset}} will be replaced by the fileset name
*/
- String RESERVED_FILESET_PLACEHOLDER = "placeholder-fileset";
+ String PROPERTY_FILESET_PLACEHOLDER = "placeholder-fileset";
+
+ /** The property name for the default location name of the fileset. */
+ String PROPERTY_DEFAULT_LOCATION_NAME = "default-location-name";
+
+ /** The reserved location name to indicate the location name is unknown. */
+ String LOCATION_NAME_UNKNOWN = "unknown";
/** An enum representing the type of the fileset object. */
enum Type {
@@ -92,11 +101,13 @@ public interface Fileset extends Auditable {
Type type();
/**
- * Get the storage location of the file or directory path that is managed by
this fileset object.
+ * Get the unnamed storage location of the file or directory path that is
managed by this fileset
+ * object.
*
* <p>The returned storageLocation can either be the one specified when
creating the fileset
- * object, or the one specified in the catalog / schema level if the fileset
object is created
- * under this catalog / schema.
+ * object (using storageLocation field or storageLocations field), or the
one specified in the
+ * catalog / schema level (using property "location" or properties with
prefix "location-") if the
+ * fileset object is created under this catalog / schema.
*
* <p>The storageLocation in each level can contain placeholders, format as
{{name}}, which will
* be replaced by the corresponding fileset property value when the fileset
object is created. The
@@ -133,10 +144,10 @@ public interface Fileset extends Auditable {
* <p>a. "{schema location}/filesetName" if {schema location} does not
contain any placeholder.
*
* <p>b. "{schema location}" - placeholders in the {schema location} will be
replaced by the
+ * placeholder value specified in the fileset properties.
*
- * <p>5) When both catalog property "location" and schema property
"location" are not specified,
- * and storageLocation specified when creating the fileset object is null,
this situation is
- * illegal.
+ * <p>5) null value - when catalog property "location", schema property
"location",
+ * storageLocation field of fileset, and "unknown" location in
storageLocations are not specified.
*
* <p>For external fileset, the storageLocation can be:
*
@@ -145,7 +156,76 @@ public interface Fileset extends Auditable {
*
* @return The storage location of the fileset object.
*/
- String storageLocation();
+ default String storageLocation() {
+ return storageLocations().get(LOCATION_NAME_UNKNOWN);
+ }
+
+ /**
+ * Get the storage location name and corresponding path of the file or
directory path that is
+ * managed by this fileset object. The key is the name of the storage
location and the value is
+ * the storage location path.
+ *
+ * <p>Each storageLocation in the values can either be the one specified
when creating the fileset
+ * object, or the one specified in the catalog / schema level if the fileset
object is created
+ * under this catalog / schema.
+ *
+ * <p>The "unknown" location name is reserved to indicate the storage
location of the fileset. It
+ * can be specified in catalog / schema level by the property "location" or
in the fileset level
+ * by the field "storageLocation". Other location names can be specified in
the fileset level by
+ * the key-value pairs in the field "storageLocations", and by
"location-{name}" properties in the
+ * catalog / schema level.
+ *
+ * <p>The storageLocation in each level can contain placeholders, format as
{{name}}, which will
+ * be replaced by the corresponding fileset property value when the fileset
object is created. The
+ * placeholder property in the fileset object is formed as
"placeholder-{{name}}". For example, if
+ * the storageLocation is "file:///path/{{schema}}-{{fileset}}-{{version}}",
and the fileset
+ * object "catalog1.schema1.fileset1" has the property "placeholder-version"
set to "v1", then the
+ * storageLocation will be "file:///path/schema1-fileset1-v1".
+ *
+ * <p>For managed fileset, the storageLocation can be:
+ *
+ * <p>1) The one specified when creating the fileset object, and the
placeholders in the
+ * storageLocation will be replaced by the placeholder value specified in
the fileset properties.
+ *
+ * <p>2) When catalog property "location" is specified but schema property
"location" is not
+ * specified, then the storageLocation will be:
+ *
+ * <p>a. "{catalog location}/schemaName/filesetName" if {catalog location}
does not contain any
+ * placeholder.
+ *
+ * <p>b. "{catalog location}" - placeholders in the {catalog location} will
be replaced by the
+ * placeholder value specified in the fileset properties.
+ *
+ * <p>3) When catalog property "location" is not specified but schema
property "location" is
+ * specified, then the storageLocation will be:
+ *
+ * <p>a. "{schema location}/filesetName" if {schema location} does not
contain any placeholder.
+ *
+ * <p>b. "{schema location}" - placeholders in the {schema location} will be
replaced by the
+ * placeholder value specified in the fileset properties.
+ *
+ * <p>4) When both catalog property "location" and schema property
"location" are specified, then
+ * the storageLocation will be:
+ *
+ * <p>a. "{schema location}/filesetName" if {schema location} does not
contain any placeholder.
+ *
+ * <p>b. "{schema location}" - placeholders in the {schema location} will be
replaced by values
+ * specified in the fileset properties.
+ *
+ * <p>5) When there is no location specified in catalog level, schema level,
storageLocation of
+ * fileset, and storageLocations of fileset at the same time, this situation
is illegal.
+ *
+ * <p>For external fileset, the storageLocation can be:
+ *
+ * <p>1) The one specified when creating the fileset object, and the
placeholders in the
+ * storageLocation will be replaced by the placeholder value specified in
the fileset properties.
+ *
+ * @return The storage locations of the fileset object, the key is the name
of the storage
+ * location and the value is the storage location path.
+ */
+ default Map<String, String> storageLocations() {
+ throw new UnsupportedOperationException("Not implemented");
+ }
/**
* @return The properties of the fileset object. Empty map is returned if no
properties are set.
diff --git a/api/src/main/java/org/apache/gravitino/file/FilesetCatalog.java
b/api/src/main/java/org/apache/gravitino/file/FilesetCatalog.java
index a4ba788eef..448b1990a7 100644
--- a/api/src/main/java/org/apache/gravitino/file/FilesetCatalog.java
+++ b/api/src/main/java/org/apache/gravitino/file/FilesetCatalog.java
@@ -18,12 +18,16 @@
*/
package org.apache.gravitino.file;
+import static org.apache.gravitino.file.Fileset.LOCATION_NAME_UNKNOWN;
+
+import com.google.common.collect.ImmutableMap;
import java.util.Map;
import org.apache.gravitino.NameIdentifier;
import org.apache.gravitino.Namespace;
import org.apache.gravitino.annotation.Evolving;
import org.apache.gravitino.exceptions.FilesetAlreadyExistsException;
import org.apache.gravitino.exceptions.NoSuchFilesetException;
+import org.apache.gravitino.exceptions.NoSuchLocationNameException;
import org.apache.gravitino.exceptions.NoSuchSchemaException;
import org.apache.gravitino.file.FilesetChange.RenameFileset;
@@ -68,7 +72,7 @@ public interface FilesetCatalog {
}
/**
- * Create a fileset metadata in the catalog.
+ * Create a fileset metadata with a default location in the catalog.
*
* <p>If the type of the fileset object is "MANAGED", the underlying
storageLocation can be null,
* and Gravitino will manage the storage location based on the location of
the schema.
@@ -84,13 +88,44 @@ public interface FilesetCatalog {
* @throws NoSuchSchemaException If the schema does not exist.
* @throws FilesetAlreadyExistsException If the fileset already exists.
*/
- Fileset createFileset(
+ default Fileset createFileset(
NameIdentifier ident,
String comment,
Fileset.Type type,
String storageLocation,
Map<String, String> properties)
- throws NoSuchSchemaException, FilesetAlreadyExistsException;
+ throws NoSuchSchemaException, FilesetAlreadyExistsException {
+ return createMultipleLocationFileset(
+ ident,
+ comment,
+ type,
+ storageLocation == null
+ ? ImmutableMap.of()
+ : ImmutableMap.of(LOCATION_NAME_UNKNOWN, storageLocation),
+ properties);
+ }
+
+ /**
+ * Create a fileset metadata with multiple storage locations in the catalog.
+ *
+ * @param ident A fileset identifier.
+ * @param comment The comment of the fileset.
+ * @param type The type of the fileset.
+ * @param storageLocations The location names and storage locations of the
fileset.
+ * @param properties The properties of the fileset.
+ * @return The created fileset metadata
+ * @throws NoSuchSchemaException If the schema does not exist.
+ * @throws FilesetAlreadyExistsException If the fileset already exists.
+ */
+ default Fileset createMultipleLocationFileset(
+ NameIdentifier ident,
+ String comment,
+ Fileset.Type type,
+ Map<String, String> storageLocations,
+ Map<String, String> properties)
+ throws NoSuchSchemaException, FilesetAlreadyExistsException {
+ throw new UnsupportedOperationException("Not implemented");
+ }
/**
* Apply the {@link FilesetChange change} to a fileset in the catalog.
@@ -121,14 +156,33 @@ public interface FilesetCatalog {
*/
boolean dropFileset(NameIdentifier ident);
+ /**
+ * Get the actual location of a file or directory based on the default
storage location of Fileset
+ * and the sub path.
+ *
+ * @param ident A fileset identifier.
+ * @param subPath The sub path to the file or directory.
+ * @return The actual location of the file or directory.
+ * @throws NoSuchFilesetException If the fileset does not exist.
+ */
+ default String getFileLocation(NameIdentifier ident, String subPath)
+ throws NoSuchFilesetException {
+ return getFileLocation(ident, subPath, null);
+ }
+
/**
* Get the actual location of a file or directory based on the storage
location of Fileset and the
- * sub path.
+ * sub path by the location name.
*
* @param ident A fileset identifier.
* @param subPath The sub path to the file or directory.
+ * @param locationName The location name. If null, the default location will
be used.
* @return The actual location of the file or directory.
* @throws NoSuchFilesetException If the fileset does not exist.
+ * @throws NoSuchLocationNameException If the location name does not exist.
*/
- String getFileLocation(NameIdentifier ident, String subPath) throws
NoSuchFilesetException;
+ default String getFileLocation(NameIdentifier ident, String subPath, String
locationName)
+ throws NoSuchFilesetException, NoSuchLocationNameException {
+ throw new UnsupportedOperationException("Not implemented");
+ }
}
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
index ce273ecfe9..aebf5b5de4 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java
@@ -19,10 +19,10 @@
package org.apache.gravitino.catalog.hadoop;
import static org.apache.gravitino.connector.BaseCatalog.CATALOG_BYPASS_PREFIX;
-import static org.apache.gravitino.file.Fileset.LOCATION_PLACEHOLDER_PREFIX;
-import static org.apache.gravitino.file.Fileset.RESERVED_CATALOG_PLACEHOLDER;
-import static org.apache.gravitino.file.Fileset.RESERVED_FILESET_PLACEHOLDER;
-import static org.apache.gravitino.file.Fileset.RESERVED_SCHEMA_PLACEHOLDER;
+import static org.apache.gravitino.file.Fileset.PROPERTY_CATALOG_PLACEHOLDER;
+import static org.apache.gravitino.file.Fileset.PROPERTY_FILESET_PLACEHOLDER;
+import static
org.apache.gravitino.file.Fileset.PROPERTY_LOCATION_PLACEHOLDER_PREFIX;
+import static org.apache.gravitino.file.Fileset.PROPERTY_SCHEMA_PLACEHOLDER;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
@@ -776,17 +776,19 @@ public class HadoopCatalogOperations extends
ManagedSchemaOperations
Map<String, String> placeholderMapping = new HashMap<>();
properties.forEach(
(k, v) -> {
- if (k.startsWith(LOCATION_PLACEHOLDER_PREFIX)) {
-
placeholderMapping.put(k.substring(LOCATION_PLACEHOLDER_PREFIX.length()), v);
+ if (k.startsWith(PROPERTY_LOCATION_PLACEHOLDER_PREFIX)) {
+
placeholderMapping.put(k.substring(PROPERTY_LOCATION_PLACEHOLDER_PREFIX.length()),
v);
}
});
placeholderMapping.put(
-
RESERVED_CATALOG_PLACEHOLDER.substring(LOCATION_PLACEHOLDER_PREFIX.length()),
+
PROPERTY_CATALOG_PLACEHOLDER.substring(PROPERTY_LOCATION_PLACEHOLDER_PREFIX.length()),
catalogInfo.name());
placeholderMapping.put(
-
RESERVED_SCHEMA_PLACEHOLDER.substring(LOCATION_PLACEHOLDER_PREFIX.length()),
schemaName);
+
PROPERTY_SCHEMA_PLACEHOLDER.substring(PROPERTY_LOCATION_PLACEHOLDER_PREFIX.length()),
+ schemaName);
placeholderMapping.put(
-
RESERVED_FILESET_PLACEHOLDER.substring(LOCATION_PLACEHOLDER_PREFIX.length()),
filesetName);
+
PROPERTY_FILESET_PLACEHOLDER.substring(PROPERTY_LOCATION_PLACEHOLDER_PREFIX.length()),
+ filesetName);
// case 2: storageLocation is not empty and contains placeholder
if (StringUtils.isNotBlank(storageLocation)) {
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
index c7c723fe8f..f8dc025558 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogPropertiesMetadata.java
@@ -20,6 +20,8 @@ package org.apache.gravitino.catalog.hadoop;
import static
org.apache.gravitino.catalog.hadoop.authentication.kerberos.KerberosConfig.KERBEROS_PROPERTY_ENTRIES;
import static
org.apache.gravitino.catalog.hadoop.fs.Constants.BUILTIN_LOCAL_FS_PROVIDER;
+import static org.apache.gravitino.file.Fileset.LOCATION_NAME_UNKNOWN;
+import static
org.apache.gravitino.file.Fileset.PROPERTY_MULTIPLE_LOCATIONS_PREFIX;
import com.google.common.collect.ImmutableMap;
import java.util.Map;
@@ -67,6 +69,21 @@ public class HadoopCatalogPropertiesMetadata extends
BaseCatalogPropertiesMetada
false /* immutable */,
null,
false /* hidden */))
+ .put(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX + LOCATION_NAME_UNKNOWN,
+ PropertyEntry.stringReservedPropertyEntry(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX + LOCATION_NAME_UNKNOWN,
+ "The storage location equivalent to property 'location'",
+ true /* hidden */))
+ .put(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX,
+ PropertyEntry.stringImmutablePropertyPrefixEntry(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX,
+ "The prefix of the location name",
+ false /* required */,
+ null /* default value */,
+ false /* hidden */,
+ false /* reserved */))
.put(
FILESYSTEM_PROVIDERS,
PropertyEntry.stringOptionalPropertyEntry(
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopFilesetPropertiesMetadata.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopFilesetPropertiesMetadata.java
index 2a43de62e1..6ff885ac83 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopFilesetPropertiesMetadata.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopFilesetPropertiesMetadata.java
@@ -18,10 +18,10 @@
*/
package org.apache.gravitino.catalog.hadoop;
-import static org.apache.gravitino.file.Fileset.LOCATION_PLACEHOLDER_PREFIX;
-import static org.apache.gravitino.file.Fileset.RESERVED_CATALOG_PLACEHOLDER;
-import static org.apache.gravitino.file.Fileset.RESERVED_FILESET_PLACEHOLDER;
-import static org.apache.gravitino.file.Fileset.RESERVED_SCHEMA_PLACEHOLDER;
+import static org.apache.gravitino.file.Fileset.PROPERTY_CATALOG_PLACEHOLDER;
+import static org.apache.gravitino.file.Fileset.PROPERTY_FILESET_PLACEHOLDER;
+import static
org.apache.gravitino.file.Fileset.PROPERTY_LOCATION_PLACEHOLDER_PREFIX;
+import static org.apache.gravitino.file.Fileset.PROPERTY_SCHEMA_PLACEHOLDER;
import com.google.common.collect.ImmutableMap;
import java.util.Map;
@@ -38,27 +38,27 @@ public class HadoopFilesetPropertiesMetadata extends
BasePropertiesMetadata {
ImmutableMap.Builder<String, PropertyEntry<?>> builder =
ImmutableMap.builder();
builder
.put(
- RESERVED_CATALOG_PLACEHOLDER,
+ PROPERTY_CATALOG_PLACEHOLDER,
PropertyEntry.stringReservedPropertyEntry(
- RESERVED_CATALOG_PLACEHOLDER,
+ PROPERTY_CATALOG_PLACEHOLDER,
"The placeholder will be replaced to catalog name in the
location",
true /* hidden */))
.put(
- RESERVED_SCHEMA_PLACEHOLDER,
+ PROPERTY_SCHEMA_PLACEHOLDER,
PropertyEntry.stringReservedPropertyEntry(
- RESERVED_SCHEMA_PLACEHOLDER,
+ PROPERTY_SCHEMA_PLACEHOLDER,
"The placeholder will be replaced to schema name in the
location",
true /* hidden */))
.put(
- RESERVED_FILESET_PLACEHOLDER,
+ PROPERTY_FILESET_PLACEHOLDER,
PropertyEntry.stringReservedPropertyEntry(
- RESERVED_FILESET_PLACEHOLDER,
+ PROPERTY_FILESET_PLACEHOLDER,
"The placeholder will be replaced to fileset name in the
location",
true /* hidden */))
.put(
- LOCATION_PLACEHOLDER_PREFIX,
+ PROPERTY_LOCATION_PLACEHOLDER_PREFIX,
PropertyEntry.stringImmutablePropertyPrefixEntry(
- LOCATION_PLACEHOLDER_PREFIX,
+ PROPERTY_LOCATION_PLACEHOLDER_PREFIX,
"The prefix of fileset placeholder property",
false /* required */,
null /* default value */,
diff --git
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopSchemaPropertiesMetadata.java
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopSchemaPropertiesMetadata.java
index 9028cc48f3..5cc0940292 100644
---
a/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopSchemaPropertiesMetadata.java
+++
b/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopSchemaPropertiesMetadata.java
@@ -18,6 +18,9 @@
*/
package org.apache.gravitino.catalog.hadoop;
+import static org.apache.gravitino.file.Fileset.LOCATION_NAME_UNKNOWN;
+import static
org.apache.gravitino.file.Fileset.PROPERTY_MULTIPLE_LOCATIONS_PREFIX;
+
import com.google.common.collect.ImmutableMap;
import java.util.Map;
import org.apache.gravitino.catalog.hadoop.authentication.AuthenticationConfig;
@@ -48,6 +51,21 @@ public class HadoopSchemaPropertiesMetadata extends
BasePropertiesMetadata {
true /* immutable */,
null,
false /* hidden */))
+ .put(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX + LOCATION_NAME_UNKNOWN,
+ PropertyEntry.stringReservedPropertyEntry(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX + LOCATION_NAME_UNKNOWN,
+ "The storage location equivalent to property 'location'",
+ true /* hidden */))
+ .put(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX,
+ PropertyEntry.stringImmutablePropertyPrefixEntry(
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX,
+ "The prefix of the location name",
+ false /* required */,
+ null /* default value */,
+ false /* hidden */,
+ false /* reserved */))
.putAll(KerberosConfig.KERBEROS_PROPERTY_ENTRIES)
.putAll(AuthenticationConfig.AUTHENTICATION_PROPERTY_ENTRIES)
.putAll(CredentialConfig.CREDENTIAL_PROPERTY_ENTRIES)
diff --git
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopCatalogIT.java
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopCatalogIT.java
index 05c166df13..1f358813d9 100644
---
a/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopCatalogIT.java
+++
b/catalogs/catalog-hadoop/src/test/java/org/apache/gravitino/catalog/hadoop/integration/test/HadoopCatalogIT.java
@@ -18,7 +18,7 @@
*/
package org.apache.gravitino.catalog.hadoop.integration.test;
-import static org.apache.gravitino.file.Fileset.LOCATION_PLACEHOLDER_PREFIX;
+import static
org.apache.gravitino.file.Fileset.PROPERTY_LOCATION_PLACEHOLDER_PREFIX;
import static org.apache.gravitino.file.Fileset.Type.MANAGED;
import com.google.common.collect.ImmutableMap;
@@ -261,7 +261,7 @@ public class HadoopCatalogIT extends BaseIT {
// create fileset with placeholder in storage location
String filesetName = "test_alter_fileset";
String storageLocation = storageLocation(filesetName) + "/{{user}}";
- String placeholderKey = LOCATION_PLACEHOLDER_PREFIX + "user";
+ String placeholderKey = PROPERTY_LOCATION_PLACEHOLDER_PREFIX + "user";
createFileset(
filesetName, "comment", MANAGED, storageLocation,
ImmutableMap.of(placeholderKey, "test"));
diff --git a/clients/client-python/gravitino/api/file/fileset.py
b/clients/client-python/gravitino/api/file/fileset.py
index 25f0f4d4f9..71a7250664 100644
--- a/clients/client-python/gravitino/api/file/fileset.py
+++ b/clients/client-python/gravitino/api/file/fileset.py
@@ -34,6 +34,30 @@ class Fileset(Auditable):
with FilesetCatalog should implement this interface.
"""
+ PROPERTY_MULTIPLE_LOCATIONS_PREFIX = "location-"
+ """The prefix of the location name in the property at the catalog/schema
level."""
+
+ PROPERTY_DEFAULT_LOCATION_NAME = "default-location-name"
+ """The property name for the default location name of the fileset."""
+
+ LOCATION_NAME_UNKNOWN = "unknown"
+ """The reserved location name to indicate the location name is unknown."""
+
+ PROPERTY_LOCATION_PLACEHOLDER_PREFIX = "placeholder-"
+ """The prefix of fileset placeholder property"""
+
+ PROPERTY_CATALOG_PLACEHOLDER = "placeholder-catalog"
+ """The reserved property name for the catalog name placeholder, when
creating a fileset, all
+ placeholders as {{catalog}} will be replaced by the catalog name"""
+
+ PROPERTY_SCHEMA_PLACEHOLDER = "placeholder-schema"
+ """The reserved property name for the schema name placeholder, when
creating a fileset, all
+ placeholders as {{schema}} will be replaced by the schema name"""
+
+ PROPERTY_FILESET_PLACEHOLDER = "placeholder-fileset"
+ """The reserved property name for the fileset name placeholder, when
creating a fileset, all
+ placeholders as {{fileset}} will be replaced by the fileset name"""
+
class Type(Enum):
"""An enum representing the type of the fileset object."""
@@ -69,13 +93,13 @@ class Fileset(Auditable):
"""
pass
- @abstractmethod
def storage_location(self) -> str:
- """Get the storage location of the file or directory path managed by
this fileset object.
+ """Get the unnamed storage location of the file or directory path
managed by this fileset object.
- The returned storageLocation can be either the one specified when
creating the fileset object,
- or the one specified at the catalog/schema level if the fileset object
is created under this
- catalog/schema.
+ The returned storageLocation can be either the one specified when
creating the fileset object
+ (using storageLocation field or storageLocations field), or the one
specified at the catalog/schema
+ level (using property "location" or properties with prefix
"location-") if the fileset object is
+ created under this catalog/schema.
The storageLocation at each level can contain placeholders, formatted
as {{name}}, which will
be replaced by the corresponding fileset property value when the
fileset object is created.
@@ -107,9 +131,8 @@ class Fileset(Auditable):
b. "{schema location}" - placeholders in {schema location} will be
replaced by values
specified in fileset properties
- 5) When both catalog property "location" and schema property
"location" are not specified,
- and storageLocation specified when creating the fileset object is
null, this situation
- is illegal.
+ 5) null value - when catalog property "location", schema property
"location", storageLocation
+ field of fileset, and "unknown" location in storageLocations are not
specified.
For external fileset, the storageLocation can be:
1) The one specified when creating the fileset object, and the
placeholders in the
@@ -118,6 +141,65 @@ class Fileset(Auditable):
Returns:
str: The storage location of the fileset object.
"""
+ return self.storage_locations().get(self.LOCATION_NAME_UNKNOWN)
+
+ @abstractmethod
+ def storage_locations(self) -> Dict[str, str]:
+ """
+ Get the storage location name and corresponding path of the file or
directory path that is
+ managed by this fileset object. The key is the name of the storage
location and the value is
+ the storage location path.
+
+ Each storageLocation in the values can either be the one specified
when creating the fileset
+ object, or the one specified in the catalog / schema level if the
fileset object is created
+ under this catalog / schema.
+
+ The "unknown" location name is reserved to indicate the default
storage location of the fileset.
+ It can be specified in catalog / schema level by the property
"location" or in the fileset level
+ by the field "storageLocation". Other location names can be specified
in the fileset level by the
+ key-value pairs in the field "storageLocations", and by
"location-{name}" properties in the
+ catalog / schema level.
+
+ The storageLocation at each level can contain placeholders, formatted
as {{name}}, which will
+ be replaced by the corresponding fileset property value when the
fileset object is created.
+ The placeholder property in the fileset object is formed as
"placeholder-{{name}}". For example,
+ if the storageLocation is
"file:///path/{{schema}}-{{fileset}}-{{version}}", and the fileset
+ object "catalog1.schema1.fileset1" has the property
"placeholder-version" set to "v1",
+ then the storageLocation will be "file:///path/schema1-fileset1-v1".
+
+ For managed fileset, the storageLocation can be:
+
+ 1) The one specified when creating the fileset object, and the
placeholders in the
+ storageLocation will be replaced by the placeholder value specified
in the fileset properties.
+
+ 2) When catalog property "location" is specified but schema property
"location" is not specified,
+ then the storageLocation will be:
+ a. "{catalog location}/schemaName/filesetName" - if {catalog
location} has no placeholders
+ b. "{catalog location}" - placeholders in {catalog location} will
be replaced by values
+ specified in fileset properties
+
+ 3) When catalog property "location" is not specified but schema
property "location" is specified,
+ then the storageLocation will be:
+ a. "{schema location}/filesetName" - if {schema location} has no
placeholders
+ b. "{schema location}" - placeholders in {schema location} will be
replaced by values
+ specified in fileset properties
+
+ 4) When both catalog property "location" and schema property
"location" are specified,
+ then the storageLocation will be:
+ a. "{schema location}/filesetName" - if {schema location} has no
placeholders
+ b. "{schema location}" - placeholders in {schema location} will be
replaced by values
+ specified in fileset properties
+
+ 5) When there is no location specified in catalog level, schema level,
storageLocation of fileset,
+ and storageLocations of fileset at the same time, this situation is
illegal.
+
+ For external fileset, the storageLocation can be:
+ 1) The one specified when creating the fileset object, and the
placeholders in the
+ storageLocation will be replaced by the placeholder value specified
in the fileset properties.
+
+ :return: The storage locations of the fileset object, the key is the
name of the storage
+ location and the value is the storage location path.
+ """
pass
@abstractmethod
diff --git a/clients/client-python/gravitino/client/fileset_catalog.py
b/clients/client-python/gravitino/client/fileset_catalog.py
index 39a095ba9a..56504a5dc4 100644
--- a/clients/client-python/gravitino/client/fileset_catalog.py
+++ b/clients/client-python/gravitino/client/fileset_catalog.py
@@ -18,27 +18,28 @@
import logging
from typing import List, Dict
+from gravitino.dto.requests.fileset_create_request import FilesetCreateRequest
+
from gravitino.api.catalog import Catalog
-from gravitino.api.credential.supports_credentials import SupportsCredentials
from gravitino.api.credential.credential import Credential
+from gravitino.api.credential.supports_credentials import SupportsCredentials
from gravitino.api.file.fileset import Fileset
from gravitino.api.file.fileset_change import FilesetChange
from gravitino.audit.caller_context import CallerContextHolder, CallerContext
from gravitino.client.base_schema_catalog import BaseSchemaCatalog
from gravitino.client.generic_fileset import GenericFileset
from gravitino.dto.audit_dto import AuditDTO
-from gravitino.dto.requests.fileset_create_request import FilesetCreateRequest
from gravitino.dto.requests.fileset_update_request import FilesetUpdateRequest
from gravitino.dto.requests.fileset_updates_request import
FilesetUpdatesRequest
from gravitino.dto.responses.drop_response import DropResponse
from gravitino.dto.responses.entity_list_response import EntityListResponse
from gravitino.dto.responses.file_location_response import FileLocationResponse
from gravitino.dto.responses.fileset_response import FilesetResponse
+from gravitino.exceptions.handlers.fileset_error_handler import
FILESET_ERROR_HANDLER
from gravitino.name_identifier import NameIdentifier
from gravitino.namespace import Namespace
-from gravitino.utils import HTTPClient
from gravitino.rest.rest_utils import encode_string
-from gravitino.exceptions.handlers.fileset_error_handler import
FILESET_ERROR_HANDLER
+from gravitino.utils import HTTPClient
logger = logging.getLogger(__name__)
@@ -158,6 +159,7 @@ class FilesetCatalog(BaseSchemaCatalog,
SupportsCredentials):
Returns:
The created fileset metadata
"""
+ # todo: call create_multiple_location_fileset if multiple storage
locations are supported
self.check_fileset_name_identifier(ident)
full_namespace = self._get_fileset_full_namespace(ident.namespace())
@@ -180,6 +182,32 @@ class FilesetCatalog(BaseSchemaCatalog,
SupportsCredentials):
return GenericFileset(fileset_resp.fileset(), self.rest_client,
full_namespace)
+ def create_multiple_location_fileset(
+ self,
+ ident: NameIdentifier,
+ comment: str,
+ fileset_type: Fileset.Type,
+ storage_locations: Dict[str, str],
+ properties: Dict[str, str],
+ ):
+ """Create a fileset metadata in the catalog with multiple storage
locations.
+
+ Args:
+ ident: A fileset identifier, which should be "schema.fileset"
format.
+ comment: The comment of the fileset.
+ fileset_type: The type of the fileset.
+ storage_locations: The location names and storage locations of the
fileset.
+ properties: The properties of the fileset.
+
+ Raises:
+ NoSuchSchemaException If the schema does not exist.
+ FilesetAlreadyExistsException If the fileset already exists.
+
+ Returns:
+ The created fileset metadata
+ """
+ raise NotImplementedError("Multiple storage locations are not
supported yet")
+
def alter_fileset(self, ident: NameIdentifier, *changes) -> Fileset:
"""Update a fileset metadata in the catalog.
@@ -239,12 +267,22 @@ class FilesetCatalog(BaseSchemaCatalog,
SupportsCredentials):
return drop_resp.dropped()
- def get_file_location(self, ident: NameIdentifier, sub_path: str) -> str:
- """Get the actual location of a file or directory based on the storage
location of Fileset and the sub path.
+ def get_file_location(
+ self,
+ ident: NameIdentifier,
+ sub_path: str,
+ location_name: str = None,
+ ) -> str:
+ """Get the actual location of a file or directory based on the
+ specified storage location of Fileset and the sub path.
Args:
ident: A fileset identifier, which should be "schema.fileset"
format.
sub_path: The sub path of the file or directory.
+ location_name: The location name of the fileset.
+
+ Raises:
+ NoSuchLocationNameException If the location name does not exist.
Returns:
The actual location of the file or directory.
@@ -254,6 +292,7 @@ class FilesetCatalog(BaseSchemaCatalog,
SupportsCredentials):
full_namespace = self._get_fileset_full_namespace(ident.namespace())
try:
caller_context: CallerContext = CallerContextHolder.get()
+ # todo: add location name to the request
params = {"sub_path": encode_string(sub_path)}
resp = self.rest_client.get(
diff --git a/clients/client-python/gravitino/client/generic_fileset.py
b/clients/client-python/gravitino/client/generic_fileset.py
index 52a2975f98..c3b99dd497 100644
--- a/clients/client-python/gravitino/client/generic_fileset.py
+++ b/clients/client-python/gravitino/client/generic_fileset.py
@@ -59,6 +59,10 @@ class GenericFileset(Fileset, SupportsCredentials):
def storage_location(self) -> str:
return self._fileset.storage_location()
+ def storage_locations(self) -> Dict[str, str]:
+ # todo: implement this method and remove storage_location method
+ pass
+
def comment(self) -> Optional[str]:
return self._fileset.comment()
diff --git a/clients/client-python/gravitino/dto/fileset_dto.py
b/clients/client-python/gravitino/dto/fileset_dto.py
index 528ae3c74a..d098cd968b 100644
--- a/clients/client-python/gravitino/dto/fileset_dto.py
+++ b/clients/client-python/gravitino/dto/fileset_dto.py
@@ -46,6 +46,10 @@ class FilesetDTO(Fileset, DataClassJsonMixin):
def storage_location(self) -> str:
return self._storage_location
+ def storage_locations(self) -> Dict[str, str]:
+ # todo: implement this method and remove storage_location method
+ pass
+
def comment(self) -> Optional[str]:
return self._comment
diff --git a/clients/client-python/gravitino/exceptions/base.py
b/clients/client-python/gravitino/exceptions/base.py
index f742381214..fed9585659 100644
--- a/clients/client-python/gravitino/exceptions/base.py
+++ b/clients/client-python/gravitino/exceptions/base.py
@@ -61,6 +61,10 @@ class NoSuchFilesetException(NotFoundException):
"""Exception thrown when a file with specified name is not existed."""
+class NoSuchLocationNameException(NotFoundException):
+ """Exception thrown when a fileset location name is not found."""
+
+
class NoSuchCredentialException(NotFoundException):
"""Exception thrown when a credential with specified credential type is
not existed."""