vinothchandar commented on code in PR #13347:
URL: https://github.com/apache/hudi/pull/13347#discussion_r2114857478


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class 
SimpleConcurrentFileWritesConflictResolutionStrategy
   @Override
   public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient 
metaClient, HoodieInstant currentInstant,
                                                     Option<HoodieInstant> 
lastSuccessfulInstant) {
+    if 
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
 {
+      return getCandidateInstantsV8AndAbove(metaClient, currentInstant, 
lastSuccessfulInstant);
+    } else {
+      return getCandidateInstantsPreV8(metaClient, currentInstant, 
lastSuccessfulInstant);
+    }
+  }
+
+  /**
+   * To find which instants are conflicting for table versions 8 and above, we 
apply the following logic:
+   * <ul>
+   *   <li>Get completed instants timeline only for commits that have happened 
since the last successful write.</li>
+   *   <li>Get any completed replace commit that happened since the last 
successful write and any pending replace commit.</li>
+   * </ul>
+   * @param metaClient table meta client
+   * @param currentInstant the instant for the write this client is attempting 
to commit
+   * @param lastSuccessfulInstant the last successful write before this commit 
started
+   * @return a stream of instants that are candidates for conflict resolution
+   */
+  private Stream<HoodieInstant> 
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant 
currentInstant,
+                                                               
Option<HoodieInstant> lastSuccessfulInstant) {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    boolean isMoRTable = metaClient.getTableType() == 
HoodieTableType.MERGE_ON_READ;
+    Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+        .getCommitsTimeline()
+        .filterCompletedInstants()
+        .filter(instant -> !isMoRTable || 
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+        .findInstantsAfter(lastSuccessfulInstant.isPresent() ? 
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)
+        .getInstantsAsStream();
+
+    Stream<HoodieInstant> clusteringAndReplaceCommitInstants = activeTimeline

Review Comment:
   lets pull this into a private method.. called from both places.. ?



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class 
SimpleConcurrentFileWritesConflictResolutionStrategy
   @Override
   public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient 
metaClient, HoodieInstant currentInstant,
                                                     Option<HoodieInstant> 
lastSuccessfulInstant) {
+    if 
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
 {
+      return getCandidateInstantsV8AndAbove(metaClient, currentInstant, 
lastSuccessfulInstant);
+    } else {
+      return getCandidateInstantsPreV8(metaClient, currentInstant, 
lastSuccessfulInstant);
+    }
+  }
+
+  /**
+   * To find which instants are conflicting for table versions 8 and above, we 
apply the following logic:
+   * <ul>
+   *   <li>Get completed instants timeline only for commits that have happened 
since the last successful write.</li>
+   *   <li>Get any completed replace commit that happened since the last 
successful write and any pending replace commit.</li>
+   * </ul>
+   * @param metaClient table meta client
+   * @param currentInstant the instant for the write this client is attempting 
to commit
+   * @param lastSuccessfulInstant the last successful write before this commit 
started
+   * @return a stream of instants that are candidates for conflict resolution
+   */
+  private Stream<HoodieInstant> 
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant 
currentInstant,
+                                                               
Option<HoodieInstant> lastSuccessfulInstant) {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    boolean isMoRTable = metaClient.getTableType() == 
HoodieTableType.MERGE_ON_READ;
+    Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+        .getCommitsTimeline()
+        .filterCompletedInstants()
+        .filter(instant -> !isMoRTable || 
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+        .findInstantsAfter(lastSuccessfulInstant.isPresent() ? 
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)

Review Comment:
   lets use `lastSuccessfulInstant.map().orElse()` ? 



##########
hudi-client/hudi-client-common/src/test/java/org/apache/hudi/client/transaction/TestSimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -224,16 +228,23 @@ public void 
testConcurrentWritesWithInterleavingScheduledCompaction() throws Exc
     metaClient.reloadActiveTimeline();
     List<HoodieInstant> candidateInstants = 
strategy.getCandidateInstants(metaClient, currentInstant.get(), 
lastSuccessfulInstant).collect(
         Collectors.toList());
-    // writer 1 conflicts with scheduled compaction plan 1
-    Assertions.assertEquals(1, candidateInstants.size());
-    ConcurrentOperation thatCommitOperation = new 
ConcurrentOperation(candidateInstants.get(0), metaClient);
-    ConcurrentOperation thisCommitOperation = new 
ConcurrentOperation(currentInstant.get(), currentMetadata);
-    Assertions.assertTrue(strategy.hasConflict(thisCommitOperation, 
thatCommitOperation));
-    Assertions.assertThrows(HoodieWriteConflictException.class, () -> 
strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation));
+    if (preTableVersion8) {
+      // writer 1 conflicts with scheduled compaction plan 1
+      Assertions.assertEquals(1, candidateInstants.size());
+      ConcurrentOperation thatCommitOperation = new 
ConcurrentOperation(candidateInstants.get(0), metaClient);
+      ConcurrentOperation thisCommitOperation = new 
ConcurrentOperation(currentInstant.get(), currentMetadata);
+      Assertions.assertTrue(strategy.hasConflict(thisCommitOperation, 
thatCommitOperation));
+      Assertions.assertThrows(HoodieWriteConflictException.class, () -> 
strategy.resolveConflict(null, thisCommitOperation, thatCommitOperation));
+    } else {
+      // writer will not have conflicts with compaction since ordering is now 
based on completion time to avoid these conflicts
+      Assertions.assertTrue(candidateInstants.isEmpty());
+    }
   }
 
-  @Test
-  public void testConcurrentWritesWithInterleavingSuccessfulCompaction() 
throws Exception {
+  @ParameterizedTest
+  @ValueSource(booleans = {false, true})
+  public void testConcurrentWritesWithInterleavingSuccessfulCompaction(boolean 
preTableVersion8) throws Exception {
+    initMetaClient(preTableVersion8, HoodieTableType.MERGE_ON_READ);

Review Comment:
   I guess there are only two scenarios, that are different. So Parameterizing 
the entire test class is an overkill



##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/SimpleConcurrentFileWritesConflictResolutionStrategy.java:
##########
@@ -53,12 +55,59 @@ public class 
SimpleConcurrentFileWritesConflictResolutionStrategy
   @Override
   public Stream<HoodieInstant> getCandidateInstants(HoodieTableMetaClient 
metaClient, HoodieInstant currentInstant,
                                                     Option<HoodieInstant> 
lastSuccessfulInstant) {
+    if 
(metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.EIGHT))
 {
+      return getCandidateInstantsV8AndAbove(metaClient, currentInstant, 
lastSuccessfulInstant);
+    } else {
+      return getCandidateInstantsPreV8(metaClient, currentInstant, 
lastSuccessfulInstant);
+    }
+  }
+
+  /**
+   * To find which instants are conflicting for table versions 8 and above, we 
apply the following logic:
+   * <ul>
+   *   <li>Get completed instants timeline only for commits that have happened 
since the last successful write.</li>
+   *   <li>Get any completed replace commit that happened since the last 
successful write and any pending replace commit.</li>
+   * </ul>
+   * @param metaClient table meta client
+   * @param currentInstant the instant for the write this client is attempting 
to commit
+   * @param lastSuccessfulInstant the last successful write before this commit 
started
+   * @return a stream of instants that are candidates for conflict resolution
+   */
+  private Stream<HoodieInstant> 
getCandidateInstantsV8AndAbove(HoodieTableMetaClient metaClient, HoodieInstant 
currentInstant,
+                                                               
Option<HoodieInstant> lastSuccessfulInstant) {
+    HoodieActiveTimeline activeTimeline = metaClient.getActiveTimeline();
+    boolean isMoRTable = metaClient.getTableType() == 
HoodieTableType.MERGE_ON_READ;
+    Stream<HoodieInstant> completedCommitsInstantStream = activeTimeline
+        .getCommitsTimeline()
+        .filterCompletedInstants()
+        .filter(instant -> !isMoRTable || 
!instant.getAction().equals(HoodieTimeline.COMMIT_ACTION))
+        .findInstantsAfter(lastSuccessfulInstant.isPresent() ? 
lastSuccessfulInstant.get().requestedTime() : HoodieTimeline.INIT_INSTANT_TS)

Review Comment:
   Similar here. the only thing thats different is the extra filter.. so we 
could pull the rest to a private method shared by both paths. and do the extra 
filter. 
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to