Hisoka-X commented on code in PR #9445:
URL: https://github.com/apache/seatunnel/pull/9445#discussion_r2222253222


##########
seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCode.java:
##########
@@ -26,6 +26,7 @@ public enum CommonErrorCode implements SeaTunnelErrorCode {
     UNSUPPORTED_DATA_TYPE(
             "COMMON-07", "'<identifier>' unsupported data type '<dataType>' of 
'<field>'"),
     UNSUPPORTED_ENCODING("COMMON-08", "unsupported encoding '<encoding>'"),
+    VALIDATION_FAILED("COMMON-09", "Data validation failed: '<message>'"),

Review Comment:
   Please set the code to `COMMMON-38`, because we already had code `COMMON-09` 
in 
https://github.com/apache/seatunnel/blob/94bb6350df4ced5661268795aa8c43804983e196/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonErrorCodeDeprecated.java#L34



##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/validator/DataValidatorTransform.java:
##########
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.transform.validator;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.PhysicalColumn;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.BasicType;
+import org.apache.seatunnel.api.table.type.LocalTimeType;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.common.exception.CommonErrorCode;
+import org.apache.seatunnel.common.utils.JsonUtils;
+import 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform;
+import org.apache.seatunnel.transform.common.ErrorHandleWay;
+import org.apache.seatunnel.transform.common.TransformCommonOptions;
+import org.apache.seatunnel.transform.exception.TransformException;
+import 
org.apache.seatunnel.transform.validator.ValidationResultHandler.ValidationProcessResult;
+
+import org.apache.commons.collections4.map.SingletonMap;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/** DataValidator Transform for validating field values according to 
configured rules. */
+@Slf4j
+public class DataValidatorTransform extends AbstractCatalogSupportMapTransform 
{
+    public static final String PLUGIN_NAME = "DataValidator";
+    public static final String SOURCE_TABLE_ID = "source_table_id";
+    public static final String SOURCE_TABLE_PATH = "source_table_path";
+    public static final String ORIGINAL_DATA = "original_data";
+    public static final String VALIDATION_ERRORS = "validation_errors";
+    public static final String CREATE_TIME = "create_time";
+
+    private final DataValidatorTransformConfig config;
+    private final List<FieldValidator> fieldValidators;
+    private final ValidationResultHandler resultHandler;
+    private final ErrorHandleWay errorHandleWay;
+    private final String errorTable;
+
+    public DataValidatorTransform(ReadonlyConfig readonlyConfig, CatalogTable 
catalogTable) {
+        super(catalogTable);
+        this.config = DataValidatorTransformConfig.of(readonlyConfig);
+        this.errorHandleWay =
+                readonlyConfig
+                        
.getOptional(TransformCommonOptions.ROW_ERROR_HANDLE_WAY_OPTION)
+                        .orElse(ErrorHandleWay.FAIL);
+        this.errorTable =
+                
readonlyConfig.getOptional(TransformCommonOptions.ERROR_TABLE_OPTION).orElse(null);
+        this.resultHandler = new ValidationResultHandler();
+        this.fieldValidators = initializeFieldValidators();
+    }
+
+    @Override
+    protected SeaTunnelRow transformRow(SeaTunnelRow inputRow) {
+        // Execute validation for all fields
+        Map<String, List<ValidationResult>> fieldResults = new HashMap<>();
+        ValidationContext context =
+                new ValidationContext(
+                        inputRow,
+                        
inputCatalogTable.getTableSchema().toPhysicalRowDataType(),
+                        new HashMap<>(),
+                        null);
+
+        // Always validate all fields (no fail fast)
+        for (FieldValidator validator : fieldValidators) {
+            String fieldName = validator.getFieldName();
+            Object fieldValue = inputRow.getField(validator.getFieldIndex());
+
+            // Update context with current field name
+            ValidationContext fieldContext =
+                    new ValidationContext(
+                            inputRow,
+                            
inputCatalogTable.getTableSchema().toPhysicalRowDataType(),
+                            context.getGlobalContext(),
+                            fieldName);
+
+            List<ValidationResult> results = validator.validate(fieldValue, 
fieldContext, false);
+            fieldResults.put(fieldName, results);
+        }
+
+        // Process validation results
+        ValidationProcessResult processResult =
+                resultHandler.processResults(inputRow, fieldResults);
+
+        // Handle validation failures
+        if (!processResult.isValid()) {
+            log.error(
+                    "Validation failed for row: {}",
+                    String.join("; ", processResult.getErrorMessages()));
+
+            if (errorHandleWay == ErrorHandleWay.FAIL) {
+                Map<String, String> params =
+                        new SingletonMap<>(
+                                "message",
+                                "Validation failed: "
+                                        + String.join("; ", 
processResult.getErrorMessages()));
+                throw new 
TransformException(CommonErrorCode.VALIDATION_FAILED, params);

Review Comment:
   Please refer 
https://github.com/apache/seatunnel/blob/94bb6350df4ced5661268795aa8c43804983e196/seatunnel-common/src/main/java/org/apache/seatunnel/common/exception/CommonError.java#L61
 to create new common error.



##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/common/TransformCommonOptions.java:
##########
@@ -66,4 +69,11 @@ public class TransformCommonOptions {
                                     + "When fail is selected, data format 
error will block and an exception will be thrown. "
                                     + "When skip is selected, data format 
error will skip this column data."
                                     + "When skip_row is selected, data format 
error will skip this line data.");
+
+    public static final Option<String> ERROR_TABLE_OPTION =
+            Options.key("error_table")

Review Comment:
   ```suggestion
               Options.key("row_error_handle_way.error_table")
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to