[ https://issues.apache.org/jira/browse/GOBBLIN-2211?focusedWorklogId=975619&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-975619 ]
ASF GitHub Bot logged work on GOBBLIN-2211: ------------------------------------------- Author: ASF GitHub Bot Created on: 22/Jul/25 09:32 Start Date: 22/Jul/25 09:32 Worklog Time Spent: 10m Work Description: NamsB7 commented on code in PR #4121: URL: https://github.com/apache/gobblin/pull/4121#discussion_r2221896908 ########## gobblin-runtime/src/main/java/org/apache/gobblin/runtime/ErrorClassifier.java: ########## @@ -0,0 +1,256 @@ +package org.apache.gobblin.runtime; + +import java.io.IOException; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import javax.inject.Inject; +import lombok.extern.slf4j.Slf4j; + +import org.apache.gobblin.configuration.Category; +import org.apache.gobblin.configuration.ErrorPatternProfile; +import org.apache.gobblin.metastore.ErrorPatternStore; +import org.apache.gobblin.runtime.troubleshooter.Issue; +import org.apache.gobblin.runtime.troubleshooter.IssueSeverity; +import org.apache.gobblin.service.ServiceConfigKeys; +import org.apache.gobblin.util.ConfigUtils; + +import com.typesafe.config.Config; + + +/** + * Classifies issues by matching their summary description to error patterns and categories. + * Categorisation is based on regex patterns and their associated categories. + * Each category has an associated priority value. + */ +@Slf4j +public class ErrorClassifier { + private final List<CompiledErrorPattern> errorIssues; + private final Map<String, Category> categoryMap; + private ErrorPatternStore errorStore = null; + + private final int maxErrorsInFinalError; + private static final String DEFAULT_CODE = "T0000"; + private Category defaultCategory = null; + + /** + * Loads all error issues and categories from the store into memory. + */ + @Inject + public ErrorClassifier(ErrorPatternStore store, Config config) + throws IOException { + this.errorStore = store; + + this.maxErrorsInFinalError = + ConfigUtils.getInt(config, ServiceConfigKeys.ERROR_CLASSIFICATION_MAX_ERRORS_IN_FINAL_KEY, + ServiceConfigKeys.DEFAULT_ERROR_CLASSIFICATION_MAX_ERRORS_IN_FINAL); + + //Obtaining Categories must be done before getting ErrorIssues, as it is used in ordering ErrorIssues by category priority. Review Comment: This is not important in mysql implementation. For the In Memory implementation of pattern store, we need category priorities to order Errors. Issue Time Tracking ------------------- Worklog Id: (was: 975619) Time Spent: 3h (was: 2h 50m) > Implement Error Classification based on execution issues > -------------------------------------------------------- > > Key: GOBBLIN-2211 > URL: https://issues.apache.org/jira/browse/GOBBLIN-2211 > Project: Apache Gobblin > Issue Type: Bug > Components: gobblin-service > Reporter: Abhishek Jain > Assignee: Abhishek Tiwari > Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > > Implement Error Classification to categorize the failure reason based on > issues encountered. -- This message was sent by Atlassian Jira (v8.20.10#820010)