suryaprasanna commented on code in PR #18384:
URL: https://github.com/apache/hudi/pull/18384#discussion_r2997454239


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java:
##########
@@ -1738,6 +1741,43 @@ public boolean populateMetaFields() {
     return getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS);
   }
 
+  public Set<String> getMetaFieldsToExclude() {
+    String value = getString(HoodieTableConfig.META_FIELDS_TO_EXCLUDE);
+    if (value == null || value.trim().isEmpty()) {
+      return Collections.emptySet();
+    }
+    Set<String> excluded = new HashSet<>();
+    for (String field : value.split(",")) {
+      String trimmed = field.trim();
+      if (!trimmed.isEmpty()) {
+        excluded.add(trimmed);
+      }
+    }
+    return excluded;
+  }
+
+  /**
+   * Returns a boolean[5] indexed by meta field ordinal.
+   * true = populate this meta field, false = write empty string.
+   */
+  public boolean[] getMetaFieldPopulationFlags() {
+    boolean[] flags = new boolean[5];
+    if (!populateMetaFields()) {
+      return flags; // all false
+    }
+    Set<String> excluded = getMetaFieldsToExclude();
+    if (excluded.isEmpty()) {
+      Arrays.fill(flags, true);
+      return flags;
+    }
+    flags[0] = !excluded.contains(HoodieRecord.COMMIT_TIME_METADATA_FIELD);
+    flags[1] = !excluded.contains(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD);
+    flags[2] = !excluded.contains(HoodieRecord.RECORD_KEY_METADATA_FIELD);
+    flags[3] = !excluded.contains(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
+    flags[4] = !excluded.contains(HoodieRecord.FILENAME_METADATA_FIELD);
+    return flags;

Review Comment:
   Do we need to include OPERATION_METADATA_FIELD as well?



##########
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowDataCreateHandle.java:
##########
@@ -77,6 +77,7 @@ public class HoodieRowDataCreateHandle implements 
Serializable {
   private final String fileId;
   private final boolean preserveHoodieMetadata;
   private final boolean skipMetadataWrite;
+  private final boolean[] populateField;

Review Comment:
   Should this be populateIndividualMetaFields something like that, instead of 
populateField, what do you think?



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java:
##########
@@ -1738,6 +1741,43 @@ public boolean populateMetaFields() {
     return getBooleanOrDefault(HoodieTableConfig.POPULATE_META_FIELDS);
   }
 
+  public Set<String> getMetaFieldsToExclude() {

Review Comment:
   Should this be private?



##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetWriter.java:
##########
@@ -95,11 +108,28 @@ protected void updateRecordMetadata(InternalRow row,
                                       UTF8String recordKey,
                                       String partitionPath,
                                       long recordCount)  {
-    row.update(COMMIT_TIME_METADATA_FIELD.ordinal(), instantTime);
-    row.update(COMMIT_SEQNO_METADATA_FIELD.ordinal(), 
UTF8String.fromString(seqIdGenerator.apply(recordCount)));
-    row.update(RECORD_KEY_METADATA_FIELD.ordinal(), recordKey);
-    // TODO set partition path in ctor
-    row.update(PARTITION_PATH_METADATA_FIELD.ordinal(), 
UTF8String.fromString(partitionPath));
-    row.update(FILENAME_METADATA_FIELD.ordinal(), fileName);
+    if (populateField != null) {
+      if (populateField[0]) {
+        row.update(COMMIT_TIME_METADATA_FIELD.ordinal(), instantTime);
+      }
+      if (populateField[1]) {
+        row.update(COMMIT_SEQNO_METADATA_FIELD.ordinal(), 
UTF8String.fromString(seqIdGenerator.apply(recordCount)));
+      }
+      if (populateField[2]) {
+        row.update(RECORD_KEY_METADATA_FIELD.ordinal(), recordKey);
+      }
+      if (populateField[3]) {
+        row.update(PARTITION_PATH_METADATA_FIELD.ordinal(), 
UTF8String.fromString(partitionPath));
+      }
+      if (populateField[4]) {
+        row.update(FILENAME_METADATA_FIELD.ordinal(), fileName);
+      }
+    } else {
+      row.update(COMMIT_TIME_METADATA_FIELD.ordinal(), instantTime);
+      row.update(COMMIT_SEQNO_METADATA_FIELD.ordinal(), 
UTF8String.fromString(seqIdGenerator.apply(recordCount)));
+      row.update(RECORD_KEY_METADATA_FIELD.ordinal(), recordKey);
+      row.update(PARTITION_PATH_METADATA_FIELD.ordinal(), 
UTF8String.fromString(partitionPath));
+      row.update(FILENAME_METADATA_FIELD.ordinal(), fileName);

Review Comment:
   Do we need to include OPERATION_METADATA_FIELD as well?



##########
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableConfig.java:
##########
@@ -324,6 +324,16 @@ public static final String getDefaultPayloadClassName() {
       .withDocumentation("When enabled, populates all meta fields. When 
disabled, no meta fields are populated "
           + "and incremental queries will not be functional. This is only 
meant to be used for append only/immutable data for batch processing");
 
+  public static final ConfigProperty<String> META_FIELDS_TO_EXCLUDE = 
ConfigProperty

Review Comment:
   This would require table version upgrade, not sure how we want to track it 
as part of next version.



##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetWriter.java:
##########
@@ -44,6 +44,7 @@ public class HoodieSparkParquetWriter extends 
HoodieBaseParquetWriter<InternalRo
   private final UTF8String instantTime;
 
   private final boolean populateMetaFields;
+  private final boolean[] populateField;

Review Comment:
   Should this be populateIndividualMetaFields something like that, instead of 
populateField, what do you think?



##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala:
##########
@@ -110,11 +110,13 @@ object HoodieDatasetBulkInsertHelper
                 sparkKeyGenerator
               }
 
+          val populateField = config.getMetaFieldPopulationFlags

Review Comment:
   Should this be populateIndividualMetaFields something like that, instead of 
populateField, what do you think?



##########
hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroOrcWriter.java:
##########
@@ -70,11 +70,17 @@ public class HoodieAvroOrcWriter implements 
HoodieAvroFileWriter, Closeable {
   private final TaskContextSupplier taskContextSupplier;
 
   private final HoodieOrcConfig orcConfig;
+  private final boolean[] populateField;
   private String minRecordKey;
   private String maxRecordKey;
 
   public HoodieAvroOrcWriter(String instantTime, StoragePath file, 
HoodieOrcConfig config, HoodieSchema schema,
                              TaskContextSupplier taskContextSupplier) throws 
IOException {
+    this(instantTime, file, config, schema, taskContextSupplier, null);
+  }
+
+  public HoodieAvroOrcWriter(String instantTime, StoragePath file, 
HoodieOrcConfig config, HoodieSchema schema,
+                             TaskContextSupplier taskContextSupplier, 
boolean[] populateField) throws IOException {

Review Comment:
   Should this be populateIndividualMetaFields something like that, instead of 
populateField, what do you think?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to