shangxinli commented on code in PR #18362: URL: https://github.com/apache/hudi/pull/18362#discussion_r2976547153
########## hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/validator/FlinkValidatorUtils.java: ########## @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.sink.validator; + +import org.apache.hudi.client.WriteStatus; +import org.apache.hudi.client.validator.BasePreCommitValidator; +import org.apache.hudi.client.validator.ValidationContext; +import org.apache.hudi.common.config.TypedProperties; +import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.model.HoodieWriteStat; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ReflectionUtils; +import org.apache.hudi.common.util.StringUtils; +import org.apache.hudi.config.HoodiePreCommitValidatorConfig; +import org.apache.hudi.exception.HoodieValidationException; + +import lombok.extern.slf4j.Slf4j; +import org.apache.flink.configuration.Configuration; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Utility for running pre-commit validators in the Flink commit flow. + * + * <p>Instantiates and executes validators configured via + * {@code hoodie.precommit.validators}. Each validator must extend + * {@link BasePreCommitValidator} and have a constructor that accepts + * {@link TypedProperties}.</p> + * + * <p>Called from {@code StreamWriteOperatorCoordinator.doCommit()} before + * the commit is finalized.</p> + */ +@Slf4j +public class FlinkValidatorUtils { + + /** + * Run all configured pre-commit validators. + * + * @param conf Flink configuration containing validator class names + * @param instant Commit instant time + * @param allWriteStatus Write statuses from all operators + * @param checkpointCommitMetadata Extra metadata being committed (contains checkpoint info) + * @param previousCommitMetadata Metadata from the previous completed commit + * @throws HoodieValidationException if any validator fails with FAIL policy + */ + public static void runValidators(Configuration conf, + String instant, + List<WriteStatus> allWriteStatus, + Map<String, String> checkpointCommitMetadata, + Option<HoodieCommitMetadata> previousCommitMetadata) { + String validatorClassNames = conf.getString( + HoodiePreCommitValidatorConfig.VALIDATOR_CLASS_NAMES.key(), + HoodiePreCommitValidatorConfig.VALIDATOR_CLASS_NAMES.defaultValue()); + + if (StringUtils.isNullOrEmpty(validatorClassNames)) { + return; Review Comment: Changed parameter from Option<HoodieCommitMetadata> to Supplier<Option<HoodieCommitMetadata>>, evaluated lazily after the empty-validators early return ########## hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/sink/StreamWriteOperatorCoordinator.java: ########## @@ -658,6 +666,26 @@ private void doCommit(long checkpointId, String instant, List<WriteStatus> dataW } } + /** + * Get commit metadata from the last completed commit on the timeline. + * Used for pre-commit validation to compare current commit against previous. + */ + private Option<HoodieCommitMetadata> getPreviousCommitMetadata() { Review Comment: Moved getPreviousCommitMetadata() to StreamerUtil as a public static method -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
