CosmosNi commented on code in PR #9445:
URL: https://github.com/apache/seatunnel/pull/9445#discussion_r2163030594


##########
seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/validator/DataValidatorTransform.java:
##########
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.transform.validator;
+
+import org.apache.seatunnel.api.configuration.ReadonlyConfig;
+import org.apache.seatunnel.api.table.catalog.CatalogTable;
+import org.apache.seatunnel.api.table.catalog.Column;
+import org.apache.seatunnel.api.table.catalog.ConstraintKey;
+import org.apache.seatunnel.api.table.catalog.PrimaryKey;
+import org.apache.seatunnel.api.table.catalog.TableIdentifier;
+import org.apache.seatunnel.api.table.catalog.TableSchema;
+import org.apache.seatunnel.api.table.type.SeaTunnelRow;
+import org.apache.seatunnel.api.table.type.SeaTunnelRowType;
+import org.apache.seatunnel.common.exception.CommonErrorCode;
+import 
org.apache.seatunnel.transform.common.AbstractCatalogSupportMapTransform;
+import org.apache.seatunnel.transform.common.ErrorHandleWay;
+import org.apache.seatunnel.transform.common.TransformCommonOptions;
+import org.apache.seatunnel.transform.exception.TransformException;
+import 
org.apache.seatunnel.transform.validator.ValidationResultHandler.ValidationProcessResult;
+
+import org.apache.commons.collections4.map.SingletonMap;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/** DataValidator Transform for validating field values according to 
configured rules. */
+@Slf4j
+public class DataValidatorTransform extends AbstractCatalogSupportMapTransform 
{
+    public static final String PLUGIN_NAME = "DataValidator";
+
+    private final DataValidatorTransformConfig config;
+    private final List<FieldValidator> fieldValidators;
+    private final ValidationResultHandler resultHandler;
+    private final ErrorHandleWay errorHandleWay;
+    private final String errorTable;
+
+    public DataValidatorTransform(ReadonlyConfig readonlyConfig, CatalogTable 
catalogTable) {
+        super(catalogTable);
+        this.config = DataValidatorTransformConfig.of(readonlyConfig);
+        this.errorHandleWay =
+                readonlyConfig
+                        
.getOptional(TransformCommonOptions.ROW_ERROR_HANDLE_WAY_OPTION)
+                        .orElse(ErrorHandleWay.FAIL);
+        this.errorTable =
+                
readonlyConfig.getOptional(TransformCommonOptions.ERROR_TABLE_OPTION).orElse(null);
+        this.resultHandler = new ValidationResultHandler();
+        this.fieldValidators = initializeFieldValidators();
+    }
+
+    @Override
+    protected SeaTunnelRow transformRow(SeaTunnelRow inputRow) {
+        // Execute validation for all fields
+        Map<String, List<ValidationResult>> fieldResults = new HashMap<>();
+        ValidationContext context =
+                new ValidationContext(
+                        inputRow,
+                        
inputCatalogTable.getTableSchema().toPhysicalRowDataType(),
+                        new HashMap<>(),
+                        null);
+
+        // Always validate all fields (no fail fast)
+        for (FieldValidator validator : fieldValidators) {
+            String fieldName = validator.getFieldName();
+            Object fieldValue = inputRow.getField(validator.getFieldIndex());
+
+            // Update context with current field name
+            ValidationContext fieldContext =
+                    new ValidationContext(
+                            inputRow,
+                            
inputCatalogTable.getTableSchema().toPhysicalRowDataType(),
+                            context.getGlobalContext(),
+                            fieldName);
+
+            List<ValidationResult> results = validator.validate(fieldValue, 
fieldContext, false);
+            fieldResults.put(fieldName, results);
+        }
+
+        // Process validation results
+        ValidationProcessResult processResult =
+                resultHandler.processResults(inputRow, fieldResults);
+
+        // Handle validation failures
+        if (!processResult.isValid()) {
+            log.error(
+                    "Validation failed for row: {}",
+                    String.join("; ", processResult.getErrorMessages()));
+
+            if (errorHandleWay == ErrorHandleWay.FAIL) {
+                Map<String, String> params =
+                        new SingletonMap<>(
+                                "message",
+                                "Validation failed: "
+                                        + String.join("; ", 
processResult.getErrorMessages()));
+                throw new 
TransformException(CommonErrorCode.VALIDATION_FAILED, params);
+            } else if (errorHandleWay == ErrorHandleWay.SKIP) {
+                return null; // Skip this row
+            } else if (errorHandleWay.allowRouteToTable()) {
+                // Route invalid data to error table by setting tableId
+                if (errorTable != null && !errorTable.isEmpty()) {
+                    SeaTunnelRow errorRow = inputRow.copy();
+                    errorRow.setTableId(errorTable);
+                    log.debug("Routing invalid data to error table: {}", 
errorTable);
+                    return errorRow;

Review Comment:
   How about keeping only three fields, source_table_id, original_data, 
validation_errors.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to