[ https://issues.apache.org/jira/browse/GOBBLIN-2211?focusedWorklogId=975615&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-975615 ]
ASF GitHub Bot logged work on GOBBLIN-2211: ------------------------------------------- Author: ASF GitHub Bot Created on: 22/Jul/25 09:29 Start Date: 22/Jul/25 09:29 Worklog Time Spent: 10m Work Description: NamsB7 commented on code in PR #4121: URL: https://github.com/apache/gobblin/pull/4121#discussion_r2221888972 ########## gobblin-runtime/src/main/java/org/apache/gobblin/runtime/ErrorClassifier.java: ########## @@ -0,0 +1,256 @@ +package org.apache.gobblin.runtime; + +import java.io.IOException; +import java.time.ZonedDateTime; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import javax.inject.Inject; +import lombok.extern.slf4j.Slf4j; + +import org.apache.gobblin.configuration.Category; +import org.apache.gobblin.configuration.ErrorPatternProfile; +import org.apache.gobblin.metastore.ErrorPatternStore; +import org.apache.gobblin.runtime.troubleshooter.Issue; +import org.apache.gobblin.runtime.troubleshooter.IssueSeverity; +import org.apache.gobblin.service.ServiceConfigKeys; +import org.apache.gobblin.util.ConfigUtils; + +import com.typesafe.config.Config; + + +/** + * Classifies issues by matching their summary description to error patterns and categories. + * Categorisation is based on regex patterns and their associated categories. + * Each category has an associated priority value. + */ +@Slf4j +public class ErrorClassifier { + private final List<CompiledErrorPattern> errorIssues; + private final Map<String, Category> categoryMap; + private ErrorPatternStore errorStore = null; + + private final int maxErrorsInFinalError; + private static final String DEFAULT_CODE = "T0000"; + private Category defaultCategory = null; + + /** + * Loads all error issues and categories from the store into memory. + */ + @Inject + public ErrorClassifier(ErrorPatternStore store, Config config) + throws IOException { + this.errorStore = store; + + this.maxErrorsInFinalError = + ConfigUtils.getInt(config, ServiceConfigKeys.ERROR_CLASSIFICATION_MAX_ERRORS_IN_FINAL_KEY, + ServiceConfigKeys.DEFAULT_ERROR_CLASSIFICATION_MAX_ERRORS_IN_FINAL); + + //Obtaining Categories must be done before getting ErrorIssues, as it is used in ordering ErrorIssues by category priority. + this.categoryMap = new HashMap<>(); + for (Category cat : this.errorStore.getAllErrorCategories()) { + categoryMap.put(cat.getCategoryName(), cat); + } + + this.errorIssues = new ArrayList<>(); + for (ErrorPatternProfile issue : this.errorStore.getAllErrorIssuesOrderedByCategoryPriority()) { Review Comment: Done Issue Time Tracking ------------------- Worklog Id: (was: 975615) Time Spent: 2h 20m (was: 2h 10m) > Implement Error Classification based on execution issues > -------------------------------------------------------- > > Key: GOBBLIN-2211 > URL: https://issues.apache.org/jira/browse/GOBBLIN-2211 > Project: Apache Gobblin > Issue Type: Bug > Components: gobblin-service > Reporter: Abhishek Jain > Assignee: Abhishek Tiwari > Priority: Major > Time Spent: 2h 20m > Remaining Estimate: 0h > > Implement Error Classification to categorize the failure reason based on > issues encountered. -- This message was sent by Atlassian Jira (v8.20.10#820010)