This is an automated email from the ASF dual-hosted git repository.
danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 43b4852a1b1 [HUDI-6350] Allow athena to use the metadata table (#8929)
43b4852a1b1 is described below
commit 43b4852a1b1fdf5ef28acc44f1c5a1268ee6fd88
Author: Nicolas Paris <[email protected]>
AuthorDate: Fri Jun 16 04:18:44 2023 +0200
[HUDI-6350] Allow athena to use the metadata table (#8929)
---
.../org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java | 10 ++++++++++
.../org/apache/hudi/config/GlueCatalogSyncClientConfig.java | 7 +++++++
2 files changed, 17 insertions(+)
diff --git
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
index 77b2f26956b..3af222f5fc3 100644
---
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
+++
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
@@ -72,6 +72,7 @@ import java.util.stream.Collectors;
import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
import static org.apache.hudi.common.util.MapUtils.containsAll;
import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty;
+import static
org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING;
import static
org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE;
import static
org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE;
import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType;
@@ -91,16 +92,23 @@ public class AWSGlueCatalogSyncClient extends
HoodieSyncClient {
private static final Logger LOG =
LoggerFactory.getLogger(AWSGlueCatalogSyncClient.class);
private static final int MAX_PARTITIONS_PER_REQUEST = 100;
private static final long BATCH_REQUEST_SLEEP_MILLIS = 1000L;
+ /**
+ * athena v2/v3 table property
+ * see https://docs.aws.amazon.com/athena/latest/ug/querying-hudi.html
+ */
+ private static final String ENABLE_MDT_LISTING =
"hudi.metadata-listing-enabled";
private final AWSGlue awsGlue;
private final String databaseName;
private final Boolean skipTableArchive;
+ private final String enableMetadataTable;
public AWSGlueCatalogSyncClient(HiveSyncConfig config) {
super(config);
this.awsGlue = AWSGlueClientBuilder.standard().build();
this.databaseName = config.getStringOrDefault(META_SYNC_DATABASE_NAME);
this.skipTableArchive =
config.getBooleanOrDefault(GlueCatalogSyncClientConfig.GLUE_SKIP_TABLE_ARCHIVE);
+ this.enableMetadataTable =
Boolean.toString(config.getBoolean(GLUE_METADATA_FILE_LISTING)).toUpperCase();
}
@Override
@@ -233,6 +241,7 @@ public class AWSGlueCatalogSyncClient extends
HoodieSyncClient {
@Override
public boolean updateTableProperties(String tableName, Map<String, String>
tableProperties) {
try {
+ tableProperties.put(ENABLE_MDT_LISTING, enableMetadataTable);
return updateTableParameters(awsGlue, databaseName, tableName,
tableProperties, skipTableArchive);
} catch (Exception e) {
throw new HoodieGlueSyncException("Fail to update properties for table "
+ tableId(databaseName, tableName), e);
@@ -355,6 +364,7 @@ public class AWSGlueCatalogSyncClient extends
HoodieSyncClient {
if (!config.getBoolean(HIVE_CREATE_MANAGED_TABLE)) {
params.put("EXTERNAL", "TRUE");
}
+ params.put(ENABLE_MDT_LISTING, this.enableMetadataTable);
params.putAll(tableProperties);
try {
diff --git
a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
index 298d04944d9..efffae5bd89 100644
---
a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
+++
b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
@@ -39,4 +39,11 @@ public class GlueCatalogSyncClientConfig extends
HoodieConfig {
.markAdvanced()
.sinceVersion("0.14.0")
.withDocumentation("Glue catalog sync based client will skip archiving
the table version if this config is set to true");
+
+ public static final ConfigProperty<Boolean> GLUE_METADATA_FILE_LISTING =
ConfigProperty
+ .key(GLUE_CLIENT_PROPERTY_PREFIX + "metadata_file_listing")
+ .defaultValue(false)
+ .markAdvanced()
+ .sinceVersion("0.14.0")
+ .withDocumentation("Makes athena use the metadata table to list
partitions and files. Currently it won't benefit from other features such stats
indexes");
}