This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 43b4852a1b1 [HUDI-6350] Allow athena to use the metadata table (#8929)
43b4852a1b1 is described below

commit 43b4852a1b1fdf5ef28acc44f1c5a1268ee6fd88
Author: Nicolas Paris <[email protected]>
AuthorDate: Fri Jun 16 04:18:44 2023 +0200

    [HUDI-6350] Allow athena to use the metadata table (#8929)
---
 .../org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java     | 10 ++++++++++
 .../org/apache/hudi/config/GlueCatalogSyncClientConfig.java    |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git 
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java 
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
index 77b2f26956b..3af222f5fc3 100644
--- 
a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
+++ 
b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java
@@ -72,6 +72,7 @@ import java.util.stream.Collectors;
 import static org.apache.hudi.aws.utils.S3Utils.s3aToS3;
 import static org.apache.hudi.common.util.MapUtils.containsAll;
 import static org.apache.hudi.common.util.MapUtils.isNullOrEmpty;
+import static 
org.apache.hudi.config.GlueCatalogSyncClientConfig.GLUE_METADATA_FILE_LISTING;
 import static 
org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_CREATE_MANAGED_TABLE;
 import static 
org.apache.hudi.hive.HiveSyncConfigHolder.HIVE_SUPPORT_TIMESTAMP_TYPE;
 import static org.apache.hudi.hive.util.HiveSchemaUtil.getPartitionKeyType;
@@ -91,16 +92,23 @@ public class AWSGlueCatalogSyncClient extends 
HoodieSyncClient {
   private static final Logger LOG = 
LoggerFactory.getLogger(AWSGlueCatalogSyncClient.class);
   private static final int MAX_PARTITIONS_PER_REQUEST = 100;
   private static final long BATCH_REQUEST_SLEEP_MILLIS = 1000L;
+  /**
+   * athena v2/v3 table property
+   * see https://docs.aws.amazon.com/athena/latest/ug/querying-hudi.html
+   */
+  private static final String ENABLE_MDT_LISTING = 
"hudi.metadata-listing-enabled";
   private final AWSGlue awsGlue;
   private final String databaseName;
 
   private final Boolean skipTableArchive;
+  private final String enableMetadataTable;
 
   public AWSGlueCatalogSyncClient(HiveSyncConfig config) {
     super(config);
     this.awsGlue = AWSGlueClientBuilder.standard().build();
     this.databaseName = config.getStringOrDefault(META_SYNC_DATABASE_NAME);
     this.skipTableArchive = 
config.getBooleanOrDefault(GlueCatalogSyncClientConfig.GLUE_SKIP_TABLE_ARCHIVE);
+    this.enableMetadataTable = 
Boolean.toString(config.getBoolean(GLUE_METADATA_FILE_LISTING)).toUpperCase();
   }
 
   @Override
@@ -233,6 +241,7 @@ public class AWSGlueCatalogSyncClient extends 
HoodieSyncClient {
   @Override
   public boolean updateTableProperties(String tableName, Map<String, String> 
tableProperties) {
     try {
+      tableProperties.put(ENABLE_MDT_LISTING, enableMetadataTable);
       return updateTableParameters(awsGlue, databaseName, tableName, 
tableProperties, skipTableArchive);
     } catch (Exception e) {
       throw new HoodieGlueSyncException("Fail to update properties for table " 
+ tableId(databaseName, tableName), e);
@@ -355,6 +364,7 @@ public class AWSGlueCatalogSyncClient extends 
HoodieSyncClient {
     if (!config.getBoolean(HIVE_CREATE_MANAGED_TABLE)) {
       params.put("EXTERNAL", "TRUE");
     }
+    params.put(ENABLE_MDT_LISTING, this.enableMetadataTable);
     params.putAll(tableProperties);
 
     try {
diff --git 
a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
 
b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
index 298d04944d9..efffae5bd89 100644
--- 
a/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
+++ 
b/hudi-aws/src/main/java/org/apache/hudi/config/GlueCatalogSyncClientConfig.java
@@ -39,4 +39,11 @@ public class GlueCatalogSyncClientConfig extends 
HoodieConfig {
       .markAdvanced()
       .sinceVersion("0.14.0")
       .withDocumentation("Glue catalog sync based client will skip archiving 
the table version if this config is set to true");
+
+  public static final ConfigProperty<Boolean> GLUE_METADATA_FILE_LISTING = 
ConfigProperty
+      .key(GLUE_CLIENT_PROPERTY_PREFIX + "metadata_file_listing")
+      .defaultValue(false)
+      .markAdvanced()
+      .sinceVersion("0.14.0")
+      .withDocumentation("Makes athena use the metadata table to list 
partitions and files. Currently it won't benefit from other features such stats 
indexes");
 }

Reply via email to