jonvex commented on code in PR #12310:
URL: https://github.com/apache/hudi/pull/12310#discussion_r1861198936


##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames) {
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
+  }
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema), overrideEnableCheck);
+  }
 
-    if (!targetColumnsForColumnStatsIndex.isEmpty()) {
-      return targetColumnsForColumnStatsIndex;
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema) {
+    return getColumnsToIndex(tableConfig, metadataConfig, tableSchema, false);
+  }
+
+  private static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                                HoodieMetadataConfig 
metadataConfig,
+                                                Either<List<String>, 
Lazy<Option<Schema>>> tableSchema,
+                                                boolean overrideEnableCheck) {
+    checkState(overrideEnableCheck || 
metadataConfig.isColumnStatsIndexEnabled());
+    Stream<String> columnsToIndexWithoutRequiredMetas = 
getColumnsToIndexWithoutRequiredMetas(metadataConfig, tableSchema);
+    if (!tableConfig.populateMetaFields()) {
+      return columnsToIndexWithoutRequiredMetas.collect(Collectors.toList());
     }
 
-    Option<Schema> writerSchemaOpt = lazyWriterSchemaOpt.get();
-    return writerSchemaOpt
-        .map(writerSchema ->
-            writerSchema.getFields().stream()
-                .map(Schema.Field::name)
-                .collect(Collectors.toList()))
-        .orElse(Collections.emptyList());
+    return Stream.concat(Arrays.stream(META_COLS_TO_ALWAYS_INDEX), 
columnsToIndexWithoutRequiredMetas).collect(Collectors.toList());
+  }
+
+  private static Stream<String> 
getColumnsToIndexWithoutRequiredMetas(HoodieMetadataConfig metadataConfig, 
Either<List<String>, Lazy<Option<Schema>>> tableSchema) {

Review Comment:
   It still has some meta fields if the user included them



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to