danny0405 commented on code in PR #13229:
URL: https://github.com/apache/hudi/pull/13229#discussion_r2076583982


##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java:
##########
@@ -514,23 +555,72 @@ public boolean purgePendingClustering(String 
clusteringInstant) {
     return false;
   }
 
+  protected abstract HoodieWriteMetadata<O> 
convertToOutputMetadata(HoodieWriteMetadata<T> writeMetadata);
+
+  private Map<String, List<String>> 
getPartitionToReplacedFileIds(HoodieClusteringPlan clusteringPlan, 
HoodieWriteMetadata<?> writeMetadata) {
+    Set<HoodieFileGroupId> newFilesWritten = 
writeMetadata.getWriteStats().get().stream()
+        .map(s -> new HoodieFileGroupId(s.getPartitionPath(), 
s.getFileId())).collect(Collectors.toSet());
+
+    return ClusteringUtils.getFileGroupsFromClusteringPlan(clusteringPlan)
+        .filter(fg -> 
"org.apache.hudi.client.clustering.run.strategy.SparkSingleFileSortExecutionStrategy"
+            .equals(config.getClusteringExecutionStrategyClass())
+            || !newFilesWritten.contains(fg))
+        .collect(Collectors.groupingBy(HoodieFileGroupId::getPartitionPath, 
Collectors.mapping(HoodieFileGroupId::getFileId, Collectors.toList())));
+  }
+
   /**
-   * Delete expired partition by config.
-   *
-   * @param instantTime Instant Time for the action
-   * @return HoodieWriteMetadata
+   * Check if any validators are configured and run those validations. If any 
of the validations fail, throws HoodieValidationException.
    */
-  public HoodieWriteMetadata<T> managePartitionTTL(String instantTime) {
-    HoodieTable<?, I, ?, T> table = createTable(config, 
context.getStorageConf());
-    return table.managePartitionTTL(context, instantTime);
+  protected void runPrecommitValidationForClustering(HoodieWriteMetadata<O> 
writeMetadata, HoodieTable table, String instantTime) {
+    if (StringUtils.isNullOrEmpty(config.getPreCommitValidators())) {
+      return;
+    }
+    throw new HoodieIOException("Precommit validation not implemented for all 
engines yet");
   }
 
-  protected abstract HoodieWriteMetadata<O> 
convertToOutputMetadata(HoodieWriteMetadata<T> writeMetadata);
+  private void commitClustering(HoodieWriteMetadata<O> clusteringWriteMetadata,
+                           HoodieTable table,
+                           String clusteringCommitTime) {
+    // triggering the dag for the first time for clustering
+    List<HoodieWriteStat> writeStats = 
triggerWritesAndFetchWriteStats(clusteringWriteMetadata);
+    clusteringWriteMetadata.setWriteStats(writeStats);
+
+    HoodieClusteringPlan clusteringPlan = 
ClusteringUtils.getPendingClusteringPlan(table.getMetaClient(), 
clusteringCommitTime);
+    
clusteringWriteMetadata.setPartitionToReplaceFileIds(getPartitionToReplacedFileIds(clusteringPlan,
 clusteringWriteMetadata));
+    Option<Schema> schema;
+    try {
+      schema = new 
TableSchemaResolver(table.getMetaClient()).getTableAvroSchemaIfPresent(false);
+    } catch (Exception ex) {
+      throw new HoodieSchemaException(ex);
+    }
+    // Create HoodieCommitMetadata w/ all required info except 
HoodieWriteStats which will be populated later when dag is triggered.
+    HoodieCommitMetadata commitMetadata = 
CommitUtils.buildMetadata(Collections.emptyList(), 
clusteringWriteMetadata.getPartitionToReplaceFileIds(),
+        Option.empty(), WriteOperationType.CLUSTER, schema.get().toString(), 
HoodieTimeline.CLUSTERING_ACTION);
+    clusteringWriteMetadata.setCommitMetadata(Option.of(commitMetadata));
+    HoodieReplaceCommitMetadata replaceCommitMetadata = 
(HoodieReplaceCommitMetadata) clusteringWriteMetadata.getCommitMetadata().get();
+    for (HoodieWriteStat writeStat: writeStats) {
+      replaceCommitMetadata.addWriteStat(writeStat.getPartitionPath(), 
writeStat);
+    }
+    
clusteringWriteMetadata.setCommitMetadata(Option.of(replaceCommitMetadata));
+    runPrecommitValidationForClustering(clusteringWriteMetadata, table, 
clusteringCommitTime);

Review Comment:
   Can we fix the validation for clustering just to be same with compaction, it 
seems `runPrecommitValidationForClustering` does not really belong here.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to