nsivabalan commented on code in PR #12200:
URL: https://github.com/apache/hudi/pull/12200#discussion_r1828620724


##########
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java:
##########
@@ -83,7 +83,7 @@ public final class HoodieMetadataConfig extends HoodieConfig {
   // Maximum delta commits before compaction occurs
   public static final ConfigProperty<Integer> COMPACT_NUM_DELTA_COMMITS = 
ConfigProperty
       .key(METADATA_PREFIX + ".compact.max.delta.commits")
-      .defaultValue(10)
+      .defaultValue(20)

Review Comment:
   why are these changes required? 



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieMergeHandle.java:
##########
@@ -350,10 +353,12 @@ public void write(HoodieRecord<T> oldRecord) {
     boolean copyOldRecord = true;
     String key = oldRecord.getRecordKey(oldSchema, keyGeneratorOpt);
     TypedProperties props = config.getPayloadConfig().getProps();
-    if (keyToNewRecords.containsKey(key)) {
+    // TODO: change on the basis of whether merging secondary indexes or not
+    HoodieMergeKey mergeKey = new HoodieSimpleMergeKey(new HoodieKey(key, 
partitionPath));

Review Comment:
   HoodieRecord does contain getMergeKey right? 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java:
##########
@@ -988,20 +1001,37 @@ private Map<String, 
List<HoodieRecord<HoodieMetadataPayload>>> readNonUniqueReco
     }
 
     HoodieTimer readTimer = HoodieTimer.start();
+    // baseFileRecordsMap: secondary key -> List of records in secondary index 
base file (possible that it is not compacted)
     Map<String, List<HoodieRecord<HoodieMetadataPayload>>> baseFileRecordsMap =
         fetchBaseFileAllRecordsByKeys(reader, sortedKeys, true, partitionName);
+    // logRecordsMap: secondaryKey -> Map[recordKey -> HoodieRecord] for the 
given secondary key
     if (logRecordsMap.isEmpty() && !baseFileRecordsMap.isEmpty()) {
       // file slice has only base file
       timings.add(timer.endTimer());
       if (!deleteRecordKeysFromLogs.isEmpty()) { // remove deleted records 
from log from base file record list
+        // TODO: baseFileRecordsMap could have multiple records for a single 
secondary key. Remove only the one corresponding to mergeKey.
         deleteRecordKeysFromLogs.forEach(key -> 
baseFileRecordsMap.remove(key));
       }
       return baseFileRecordsMap;
     }
 
     // check why we are not considering records missing from logs, but only 
from base file.
     logRecordsMap.forEach((secondaryKey, logRecords) -> {
-      if (!baseFileRecordsMap.containsKey(secondaryKey)) {
+      // Handle the dummy case first
+      if (secondaryKey.equals("dummySecondaryKey")) {

Review Comment:
   oh man :( 



##########
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java:
##########
@@ -98,7 +98,7 @@ public final class HoodieMetadataConfig extends HoodieConfig {
   // Log blocks threshold, after a file slice crosses this threshold log 
compact operation is scheduled.
   public static final ConfigProperty<Integer> LOG_COMPACT_BLOCKS_THRESHOLD = 
ConfigProperty
       .key(METADATA_PREFIX + ".log.compaction.blocks.threshold")
-      .defaultValue(5)
+      .defaultValue(10)

Review Comment:
   same here



##########
hudi-common/src/main/java/org/apache/hudi/common/model/DeleteRecord.java:
##########
@@ -51,25 +53,28 @@ public class DeleteRecord implements Serializable {
    */
   private final Comparable<?> orderingVal;
 
-  private DeleteRecord(HoodieKey hoodieKey, Comparable orderingVal) {
+  private final HoodieMergeKey mergeKey;
+
+  private DeleteRecord(HoodieKey hoodieKey, Comparable orderingVal, 
Option<HoodieMergeKey> mergeKeyOpt) {

Review Comment:
   this does not look elegant :( 



##########
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java:
##########
@@ -204,6 +213,14 @@ public HoodieKey getKey() {
     return key;
   }
 
+  public void setMergeKey(HoodieMergeKey mergeKey) {

Review Comment:
   I don't feel good about this change as well :( 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -2057,15 +2073,42 @@ public HoodieRecord next() {
         HoodieRecord record = fileSliceIterator.next();
         String recordKey = record.getRecordKey(tableSchema, 
HoodieRecord.RECORD_KEY_METADATA_FIELD);
         String secondaryKeyFields = String.join(".", 
indexDefinition.getSourceFields());
-        String secondaryKey;
+        String secondaryKey = null;
         try {
-          GenericRecord genericRecord = (GenericRecord) 
(record.toIndexedRecord(tableSchema, 
CollectionUtils.emptyProps()).get()).getData();
-          secondaryKey = 
HoodieAvroUtils.getNestedFieldValAsString(genericRecord, secondaryKeyFields, 
true, false);
+          if (record.toIndexedRecord(tableSchema, 
CollectionUtils.emptyProps()).isPresent()) {
+            GenericRecord genericRecord = (GenericRecord) 
(record.toIndexedRecord(tableSchema, 
CollectionUtils.emptyProps()).get()).getData();
+            secondaryKey = 
HoodieAvroUtils.getNestedFieldValAsString(genericRecord, secondaryKeyFields, 
true, false);
+          } else {
+            System.out.println("asdfasd");
+          }
         } catch (IOException e) {
           throw new RuntimeException("Failed to fetch records." + e);
         }
+        if (secondaryKey == null) {
+          // If secondary key is null, then it means data is being deleted. 
Check in unmerged records.
+          /*for (HoodieRecord unmergedRecord : records) {
+            if (unmergedRecord.getRecordKey(tableSchema, 
HoodieRecord.RECORD_KEY_METADATA_FIELD).equals(recordKey)) {
+              secondaryKey = getSecondaryKey(unmergedRecord);
+              break;
+            }
+          }*/
+          return HoodieMetadataPayload.createSecondaryIndex(recordKey, 
"dummySecondaryKey", indexDefinition.getIndexName(), false);

Review Comment:
   so many non-standard and hacky changes :( 



##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java:
##########
@@ -988,20 +1001,37 @@ private Map<String, 
List<HoodieRecord<HoodieMetadataPayload>>> readNonUniqueReco
     }
 
     HoodieTimer readTimer = HoodieTimer.start();
+    // baseFileRecordsMap: secondary key -> List of records in secondary index 
base file (possible that it is not compacted)
     Map<String, List<HoodieRecord<HoodieMetadataPayload>>> baseFileRecordsMap =
         fetchBaseFileAllRecordsByKeys(reader, sortedKeys, true, partitionName);
+    // logRecordsMap: secondaryKey -> Map[recordKey -> HoodieRecord] for the 
given secondary key

Review Comment:
   yeah, all these looks non standard that we have to do it just for secondary 
index. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to