gabriel-farache commented on code in PR #4094: URL: https://github.com/apache/incubator-kie-kogito-runtimes/pull/4094#discussion_r2581560386
########## kogito-test-utils/src/main/java/org/kie/kogito/test/utils/JsonProcessInstanceLogAnalyzer.java: ########## @@ -0,0 +1,559 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.kie.kogito.test.utils; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.fail; + +/** + * Utility class for analyzing process instance aware logging in JSON format. + * Supports parsing JSON log format with MDC fields including processInstanceId. + * This class replaces pipe-delimited format parsing for machine-consumable JSON logs. + */ +public class JsonProcessInstanceLogAnalyzer { + + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + // Common timestamp patterns in JSON logs + private static final DateTimeFormatter[] TIMESTAMP_FORMATTERS = { + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSX"), + DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSS"), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSS"), + DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss,SSS"), + DateTimeFormatter.ISO_LOCAL_DATE_TIME + }; + + /** + * Represents a single JSON log entry with all its components. + */ + public static class JsonLogEntry { + public final LocalDateTime timestamp; + public final String level; + public final String loggerName; + public final String message; + public final Map<String, String> mdc; + public final String threadName; + public final String sequenceNumber; + public final JsonNode rawJson; + + public JsonLogEntry(LocalDateTime timestamp, String level, String loggerName, + String message, Map<String, String> mdc, String threadName, + String sequenceNumber, JsonNode rawJson) { + this.timestamp = timestamp; + this.level = level != null ? level : "INFO"; + this.loggerName = loggerName != null ? loggerName : "unknown.logger"; + this.message = message != null ? message : ""; + this.mdc = mdc != null ? new HashMap<>(mdc) : new HashMap<>(); + this.threadName = threadName; + this.sequenceNumber = sequenceNumber; + this.rawJson = rawJson; + } + + /** + * Get the process instance ID from MDC. + */ + public String getProcessInstanceId() { + return mdc.get("processInstanceId"); + } + + /** + * Check if this log entry has a process instance ID. + */ + public boolean hasProcessInstance() { + String processInstanceId = getProcessInstanceId(); + return processInstanceId != null && !processInstanceId.trim().isEmpty(); + } + + /** + * Check if this log entry is general context (no process instance ID). + */ + public boolean isGeneralContext() { + return !hasProcessInstance(); + } + + /** + * Get trace ID from MDC if available. + */ + public String getTraceId() { + return mdc.get("traceId"); + } + + /** + * Get span ID from MDC if available. + */ + public String getSpanId() { + return mdc.get("spanId"); + } + + @Override + public String toString() { + return String.format("JsonLogEntry{timestamp=%s, level=%s, processInstanceId=%s, logger=%s, message=%s}", + timestamp, level, getProcessInstanceId(), loggerName, message); + } + } + + /** + * Statistics about JSON log entries for analysis. + */ + public static class JsonLogStatistics { + public final long totalLogs; + public final long processSpecificLogs; + public final long generalContextLogs; + public final Map<String, Long> logsByProcessInstance; + public final Map<String, Long> logsByLevel; + public final Map<String, Long> logsByLogger; + public final long logsWithTracing; + + public JsonLogStatistics(List<JsonLogEntry> entries) { + this.totalLogs = entries.size(); + this.processSpecificLogs = entries.stream().filter(JsonLogEntry::hasProcessInstance).count(); + this.generalContextLogs = entries.stream().filter(JsonLogEntry::isGeneralContext).count(); + this.logsByProcessInstance = entries.stream() + .collect(Collectors.groupingBy( + entry -> entry.hasProcessInstance() ? entry.getProcessInstanceId() : "", + Collectors.counting())); + this.logsByLevel = entries.stream() + .collect(Collectors.groupingBy(entry -> entry.level, Collectors.counting())); + this.logsByLogger = entries.stream() + .collect(Collectors.groupingBy(entry -> entry.loggerName, Collectors.counting())); + this.logsWithTracing = entries.stream() + .filter(entry -> entry.getTraceId() != null) + .count(); + } + + @Override + public String toString() { + return String.format( + "JsonLogStatistics{total=%d, processSpecific=%d, general=%d, byProcess=%s, byLevel=%s, withTracing=%d}", + totalLogs, processSpecificLogs, generalContextLogs, logsByProcessInstance, logsByLevel, logsWithTracing); + } + } + + /** + * Parse JSON log file with multiline support and resilient error handling. + */ + public static List<JsonLogEntry> parseJsonLogFile(Path logFile) throws IOException { + List<String> lines = Files.readAllLines(logFile); + List<JsonLogEntry> entries = new ArrayList<>(); + AtomicInteger malformedLineCount = new AtomicInteger(0); + AtomicInteger lineNumber = new AtomicInteger(0); + + for (String line : lines) { + lineNumber.incrementAndGet(); + + if (line.trim().isEmpty()) { + continue; // Skip empty lines + } + + try { + JsonLogEntry entry = parseJsonLogLine(line, malformedLineCount, lineNumber.get()); + if (entry != null) { + entries.add(entry); + } + } catch (Exception e) { + malformedLineCount.incrementAndGet(); + System.err.printf("Warning: Failed to parse JSON log line %d: %s - Error: %s%n", + lineNumber.get(), line.substring(0, Math.min(100, line.length())), e.getMessage()); + } + } + + // Log statistics about parsing + if (malformedLineCount.get() > 0) { + System.err.printf("Warning: Encountered %d malformed/problematic lines out of %d total lines while parsing %s%n", + malformedLineCount.get(), lineNumber.get(), logFile.getFileName()); + } + + return entries; + } + + /** + * Parse a single JSON log line into a JsonLogEntry. + */ + private static JsonLogEntry parseJsonLogLine(String line, AtomicInteger malformedLineCount, int lineNumber) { + try { + JsonNode jsonNode = OBJECT_MAPPER.readTree(line); + + // Extract timestamp + LocalDateTime timestamp = parseTimestamp(jsonNode, malformedLineCount); + + // Extract standard fields + String level = getTextValue(jsonNode, "level"); + String loggerName = getTextValue(jsonNode, "loggerName"); + String message = getTextValue(jsonNode, "message"); + String threadName = getTextValue(jsonNode, "thread"); + String sequenceNumber = getTextValue(jsonNode, "sequenceNumber"); + + // Extract MDC fields + Map<String, String> mdc = extractMdcFields(jsonNode); + + return new JsonLogEntry(timestamp, level, loggerName, message, mdc, threadName, sequenceNumber, jsonNode); + + } catch (JsonProcessingException e) { + // Try fallback parsing for non-JSON lines + return tryFallbackParsing(line, malformedLineCount, lineNumber); + } + } + + /** + * Extract MDC fields from JSON log entry. + */ + private static Map<String, String> extractMdcFields(JsonNode jsonNode) { + Map<String, String> mdc = new HashMap<>(); + + // Look for MDC in common field names + JsonNode mdcNode = jsonNode.get("mdc"); + if (mdcNode == null) { + mdcNode = jsonNode.get("MDC"); + } + if (mdcNode == null) { + mdcNode = jsonNode.get("context"); + } + + if (mdcNode != null && mdcNode.isObject()) { + mdcNode.fields().forEachRemaining(entry -> { + String key = entry.getKey(); + JsonNode value = entry.getValue(); + if (value.isTextual()) { + mdc.put(key, value.asText()); + } else if (!value.isNull()) { + mdc.put(key, value.toString()); + } + }); + } + + // Also check for direct MDC fields at root level (some formats) + String[] commonMdcFields = { "processInstanceId", "traceId", "spanId", "userId", "correlationId" }; + for (String field : commonMdcFields) { + JsonNode fieldNode = jsonNode.get(field); + if (fieldNode != null && fieldNode.isTextual()) { + mdc.put(field, fieldNode.asText()); + } + } + + return mdc; + } + + /** + * Parse timestamp from JSON node using multiple format attempts. + */ + private static LocalDateTime parseTimestamp(JsonNode jsonNode, AtomicInteger malformedLineCount) { + String timestampStr = getTextValue(jsonNode, "timestamp"); + if (timestampStr == null) { + timestampStr = getTextValue(jsonNode, "@timestamp"); + } + if (timestampStr == null) { + timestampStr = getTextValue(jsonNode, "time"); + } + + if (timestampStr != null) { + for (DateTimeFormatter formatter : TIMESTAMP_FORMATTERS) { + try { + return LocalDateTime.parse(timestampStr, formatter); + } catch (DateTimeParseException e) { + // Try next formatter + } + } + } + + // Fallback to current time + malformedLineCount.incrementAndGet(); + return LocalDateTime.now(); + } + + /** + * Get text value from JSON node, handling null checks. + */ + private static String getTextValue(JsonNode node, String fieldName) { + JsonNode fieldNode = node.get(fieldName); + return fieldNode != null && !fieldNode.isNull() ? fieldNode.asText() : null; + } + + /** + * Try fallback parsing for non-JSON lines (stack traces, etc.). + */ + private static JsonLogEntry tryFallbackParsing(String line, AtomicInteger malformedLineCount, int lineNumber) { + // This could be a stack trace or multiline continuation + // For now, create a simple entry + malformedLineCount.incrementAndGet(); + + Map<String, String> emptyMdc = new HashMap<>(); + return new JsonLogEntry( + LocalDateTime.now(), + "INFO", + "unknown.logger", + line, + emptyMdc, Review Comment: it's more that the parsed JSON from the log line may contains MDC/context data and if that is the case, those data must be put in the `mdc` entry of the `JsonLogEntry` So if the line parsing fails, as a fallback, we directly set the line as string message and as it is a String there is no way to potentially parse the MDC/Context data hold but the line Having the empty MDC is an indication that the line is not a proper JSON but it's not the main goal here; having the `unknown.logger` logger name indicates the same -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
