nsivabalan commented on code in PR #12310:
URL: https://github.com/apache/hudi/pull/12310#discussion_r1860041740


##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames) {
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
+  }
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema), overrideEnableCheck);
+  }
 
-    if (!targetColumnsForColumnStatsIndex.isEmpty()) {
-      return targetColumnsForColumnStatsIndex;
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema) {
+    return getColumnsToIndex(tableConfig, metadataConfig, tableSchema, false);
+  }
+
+  private static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,

Review Comment:
   lets add java docs on the method



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames) {
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
+  }
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema), overrideEnableCheck);
+  }
 
-    if (!targetColumnsForColumnStatsIndex.isEmpty()) {
-      return targetColumnsForColumnStatsIndex;
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,

Review Comment:
   do we have UTs directly against these getColumnsToIndex methods. I guess we 
have  3 to 4 diff methods here and we wanted to have tests for all of them 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames) {
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
+  }
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,

Review Comment:
   same comment as above. lets ensure we have just the required access 
specifiers



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -2184,10 +2219,7 @@ public static HoodieData<HoodieRecord> 
convertFilesToPartitionStatsRecords(Hoodi
                                                                              
HoodieTableMetaClient dataTableMetaClient,
                                                                              
Option<Schema> writerSchemaOpt) {
     Lazy<Option<Schema>> lazyWriterSchemaOpt = writerSchemaOpt.isPresent() ? 
Lazy.eagerly(writerSchemaOpt) : Lazy.lazily(() -> 
tryResolveSchemaForTable(dataTableMetaClient));
-    final List<String> columnsToIndex = getColumnsToIndex(
-        metadataConfig.isPartitionStatsIndexEnabled(),
-        metadataConfig.getColumnsEnabledForColumnStatsIndex(),
-        lazyWriterSchemaOpt);
+    final List<String> columnsToIndex = 
getColumnsToIndex(dataTableMetaClient.getTableConfig(), metadataConfig, 
lazyWriterSchemaOpt, metadataConfig.isPartitionStatsIndexEnabled());

Review Comment:
   whats the relation b/w partitions stats and the overrideEnableCheck? 
   



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames) {
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
+  }
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema), overrideEnableCheck);
+  }
 
-    if (!targetColumnsForColumnStatsIndex.isEmpty()) {
-      return targetColumnsForColumnStatsIndex;
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               Lazy<Option<Schema>> 
tableSchema) {
+    return getColumnsToIndex(tableConfig, metadataConfig, tableSchema, false);
+  }
+
+  private static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                                HoodieMetadataConfig 
metadataConfig,
+                                                Either<List<String>, 
Lazy<Option<Schema>>> tableSchema,
+                                                boolean overrideEnableCheck) {
+    checkState(overrideEnableCheck || 
metadataConfig.isColumnStatsIndexEnabled());
+    Stream<String> columnsToIndexWithoutRequiredMetas = 
getColumnsToIndexWithoutRequiredMetas(metadataConfig, tableSchema);
+    if (!tableConfig.populateMetaFields()) {
+      return columnsToIndexWithoutRequiredMetas.collect(Collectors.toList());
     }
 
-    Option<Schema> writerSchemaOpt = lazyWriterSchemaOpt.get();
-    return writerSchemaOpt
-        .map(writerSchema ->
-            writerSchema.getFields().stream()
-                .map(Schema.Field::name)
-                .collect(Collectors.toList()))
-        .orElse(Collections.emptyList());
+    return Stream.concat(Arrays.stream(META_COLS_TO_ALWAYS_INDEX), 
columnsToIndexWithoutRequiredMetas).collect(Collectors.toList());
+  }
+
+  private static Stream<String> 
getColumnsToIndexWithoutRequiredMetas(HoodieMetadataConfig metadataConfig, 
Either<List<String>, Lazy<Option<Schema>>> tableSchema) {

Review Comment:
   rename to getColumnsToIndexWithoutMetaFields



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -2239,6 +2271,16 @@ private static 
List<HoodieColumnRangeMetadata<Comparable>> getFileStatsRangeMeta
     return readColumnRangeMetadataFrom(partitionPath, fileName, 
datasetMetaClient, columnsToIndex, maxBufferSize);
   }
 
+  public static Option<Schema> 
getTableSchemaFromCommitMetadata(HoodieCommitMetadata commitMetadata, 
HoodieTableConfig tableConfig) {

Review Comment:
   again, lets avoid public methods. can you make it package private 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1185,25 +1163,82 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
     }
   }
 
-  /**
-   * Get the list of columns for the table for column stats indexing
-   */
-  private static List<String> getColumnsToIndex(boolean 
isColumnStatsIndexEnabled,
-                                                List<String> 
targetColumnsForColumnStatsIndex,
-                                                Lazy<Option<Schema>> 
lazyWriterSchemaOpt) {
-    checkState(isColumnStatsIndexEnabled);
+  public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
+  public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
+
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,

Review Comment:
   May be most of existing methods does not follow strict access specifiers. 
   can you make this package private. if you wanna do UT, you can add visible 
for testing annotation. 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1166,22 +1166,31 @@ public static HoodieData<HoodieRecord> 
convertMetadataToColumnStatsRecords(Hoodi
   public static final String[] META_COLS_TO_ALWAYS_INDEX = 
{COMMIT_TIME_METADATA_FIELD, RECORD_KEY_METADATA_FIELD, 
PARTITION_PATH_METADATA_FIELD};
   public static final Set<String> META_COL_SET_TO_INDEX = new 
HashSet<>(Arrays.asList(META_COLS_TO_ALWAYS_INDEX));
 
+  public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
+                                               HoodieMetadataConfig 
metadataConfig,
+                                               List<String> columnNames,
+                                               boolean overrideEnableCheck) {
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames), overrideEnableCheck);
+
+  }
+  
   public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
                                                HoodieMetadataConfig 
metadataConfig,
                                                List<String> columnNames) {
-    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.left(columnNames));
+    return getColumnsToIndex(tableConfig, metadataConfig, columnNames, false);
   }
 
   public static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
                                                HoodieMetadataConfig 
metadataConfig,
                                                Lazy<Option<Schema>> 
tableSchema) {
-    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema));
+    return getColumnsToIndex(tableConfig, metadataConfig, 
Either.right(tableSchema), false);
   }
 
   private static List<String> getColumnsToIndex(HoodieTableConfig tableConfig,
                                                 HoodieMetadataConfig 
metadataConfig,
-                                                Either<List<String>, 
Lazy<Option<Schema>>> tableSchema) {
-    checkState(metadataConfig.isColumnStatsIndexEnabled());
+                                                Either<List<String>, 
Lazy<Option<Schema>>> tableSchema,
+                                                boolean overrideEnableCheck) {
+    checkState(overrideEnableCheck || 
metadataConfig.isColumnStatsIndexEnabled());

Review Comment:
   @jonvex : did you follow on this? 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1203,7 +1212,21 @@ private static Stream<String> 
getColumnsToIndexWithoutRequiredMetas(HoodieMetada
   }
 
   private static Stream<String> getFirstNFieldNames(Schema tableSchema, int n) 
{
-    return 
getFirstNFieldNames(tableSchema.getFields().stream().map(Schema.Field::name), 
n);
+    return getFirstNFieldNames(tableSchema.getFields().stream()
+        .filter(field -> {
+          switch (resolveNullableSchema(field.schema()).getType()) {

Review Comment:
   Can we just re-use the same method. may be we can rename the method to, 
isDataTypeSupportedForColStats(Schema schema, HoodieRecordType recordType) 
   and use it wherever required. 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -2239,6 +2271,16 @@ private static 
List<HoodieColumnRangeMetadata<Comparable>> getFileStatsRangeMeta
     return readColumnRangeMetadataFrom(partitionPath, fileName, 
datasetMetaClient, columnsToIndex, maxBufferSize);
   }
 
+  public static Option<Schema> 
getTableSchemaFromCommitMetadata(HoodieCommitMetadata commitMetadata, 
HoodieTableConfig tableConfig) {

Review Comment:
   do we have UTs for this



##########
hudi-hadoop-common/src/test/java/org/apache/hudi/metadata/TestHoodieTableMetadataUtil.java:
##########
@@ -355,4 +358,182 @@ public void testValidateDataTypeForSecondaryIndex() {
     // Test for complex fields
     assertFalse(validateDataTypeForSecondaryIndex(Arrays.asList("arrayField", 
"mapField", "structField"), schema));
   }
+
+  @Test
+  public void testGetColumnsToIndex() {
+    HoodieTableConfig tableConfig = metaClient.getTableConfig();
+
+    //test default
+    HoodieMetadataConfig metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .build();
+    List<String> colNames = new ArrayList<>();
+    addNColumns(colNames, 
HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.defaultValue() + 10);
+    List<String> expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    addNColumns(expected, 
HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.defaultValue());
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with table schema < default
+    int tableSchemaSize = 
HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.defaultValue() - 10;
+    assertTrue(tableSchemaSize > 0);
+    colNames = new ArrayList<>();
+    addNColumns(colNames, tableSchemaSize);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    addNColumns(expected, tableSchemaSize);
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with max val < tableSchema
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withMaxColumnsToIndexForColStats(3)
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, 
HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.defaultValue() + 10);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    addNColumns(expected, 3);
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with max val > tableSchema
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        
.withMaxColumnsToIndexForColStats(HoodieMetadataConfig.COLUMN_STATS_INDEX_MAX_COLUMNS.defaultValue()
 + 10)
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, tableSchemaSize);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    addNColumns(expected, tableSchemaSize);
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with list of cols
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withColumnStatsIndexForColumns("col_1,col_7,col_11")
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, 15);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("col_1");
+    expected.add("col_7");
+    expected.add("col_11");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with list of cols longer than config
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withMaxColumnsToIndexForColStats(1)
+        .withColumnStatsIndexForColumns("col_1,col_7,col_11")
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, 15);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("col_1");
+    expected.add("col_7");
+    expected.add("col_11");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with list of cols including meta cols than config
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        
.withColumnStatsIndexForColumns("col_1,col_7,_hoodie_commit_time,col_11,_hoodie_commit_seqno")
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, 15);
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("col_1");
+    expected.add("col_7");
+    expected.add("col_11");
+    expected.add("_hoodie_commit_seqno");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with avro schema
+    Schema schema = new Schema.Parser().parse(SCHEMA_WITH_AVRO_TYPES);
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        
.withColumnStatsIndexForColumns("booleanField,decimalField,localTimestampMillisField")
+        .build();
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("booleanField");
+    expected.add("decimalField");
+    expected.add("localTimestampMillisField");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(schema))));
+
+    //test with avro schema with max cols set
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withMaxColumnsToIndexForColStats(2)
+        .build();
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("booleanField");
+    expected.add("intField");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(schema))));
+    //test with avro schema with meta cols
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(HoodieAvroUtils.addMetadataFields(schema)))));
+
+    //test with avro schema with type filter
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withMaxColumnsToIndexForColStats(100)
+        .build();
+    expected = new 
ArrayList<>(Arrays.asList(HoodieTableMetadataUtil.META_COLS_TO_ALWAYS_INDEX));
+    expected.add("timestamp");
+    expected.add("_row_key");
+    expected.add("partition_path");
+    expected.add("rider");
+    expected.add("driver");
+    expected.add("begin_lat");
+    expected.add("begin_lon");
+    expected.add("end_lat");
+    expected.add("end_lon");
+    expected.add("distance_in_meters");
+    expected.add("seconds_since_epoch");
+    expected.add("weight");
+    expected.add("nation");
+    expected.add("current_date");
+    expected.add("current_ts");
+    expected.add("_hoodie_is_deleted");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(HoodieTestDataGenerator.AVRO_SCHEMA))));
+    //test with avro schema with meta cols
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(HoodieAvroUtils.addMetadataFields(HoodieTestDataGenerator.AVRO_SCHEMA)))));
+
+    //test with meta cols disabled
+    tableConfig.setValue(HoodieTableConfig.POPULATE_META_FIELDS.key(), 
"false");
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, tableSchemaSize);
+    expected = new ArrayList<>();
+    addNColumns(expected, tableSchemaSize);
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with meta cols disabled with col list
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        .withColumnStatsIndexForColumns("col_1,col_7,col_11")
+        .build();
+    colNames = new ArrayList<>();
+    addNColumns(colNames, 15);
+    expected = new ArrayList<>();
+    expected.add("col_1");
+    expected.add("col_7");
+    expected.add("col_11");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
colNames));
+
+    //test with meta cols disabled with avro schema
+    metadataConfig = HoodieMetadataConfig.newBuilder()
+        .enable(true).withMetadataIndexColumnStats(true)
+        
.withColumnStatsIndexForColumns("booleanField,decimalField,localTimestampMillisField")
+        .build();
+    expected = new ArrayList<>();
+    expected.add("booleanField");
+    expected.add("decimalField");
+    expected.add("localTimestampMillisField");
+    assertEquals(expected, 
HoodieTableMetadataUtil.getColumnsToIndex(tableConfig, metadataConfig, 
Lazy.eagerly(Option.of(schema))));
+  }

Review Comment:
   looks like we have covered tests here for getColumnsToIndex api. 👍 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to