Re: [PR] UNOMI-944: Add IT developer tooling for run archival, cross-run comparison and live Karaf inspection [unomi]

via GitHub Sun, 07 Jun 2026 06:17:42 -0700


Copilot commented on code in PR #770:
URL: https://github.com/apache/unomi/pull/770#discussion_r3369408858



##########
itests/it-run-lib.sh:
##########
@@ -0,0 +1,442 @@
+# Shared helpers for IT run scripts (sourced, not executed).
+
+IT_SCRIPT_DIR=""
+IT_ARCHIVES_DIR=""
+
+IT_ARCHIVES_DIR_NAME="archives"
+IT_RUN_DIR_GLOB="it-run-*"
+IT_TEST_RESULTS="test-results.tsv"
+IT_RUN_SUMMARY="run-summary.properties"
+IT_RUNS_INDEX="runs-index.tsv"
+IT_FAILSAFE_XML="TEST-org.apache.unomi.itests.AllITs.xml"
+IT_LATEST_COMPARISON="latest-comparison.txt"
+IT_CAPTURE_COMPARISON="comparison-last-3.txt"
+IT_AUTO_COMPARE_LAST=3
+IT_RUN_FINGERPRINT_FIELD="run.fingerprint"
+IT_TRACE_FINGERPRINT_FIELDS='^(search\.engine|search\.heap|karaf\.heap|single\.test|use\.opensearch|maven\.exit\.code)='
+IT_RUN_SUMMARY_EXCERPT_FIELDS='^(run\.captured|search\.engine|it\.karaf\.heap|elasticsearch\.heap|opensearch\.heap|tests\.failures|tests\.errors|operator\.note)='
+IT_ENGINE_PORT_FILES=(
+    elasticsearch-port.properties
+    opensearch-port.properties
+)
+IT_RUN_CONTEXT_NO_OPERATOR_NOTE='(none — pass --message "..." to describe run 
conditions, e.g. heavy swap, CI runner, single-test rerun)'
+
+it_run_lib_init() {
+    IT_SCRIPT_DIR="$1"
+    IT_ARCHIVES_DIR="$IT_SCRIPT_DIR/$IT_ARCHIVES_DIR_NAME"
+}
+
+it_run_tools_init() {
+    local script_dir="$1"
+    cd "$script_dir"
+    it_run_lib_init "$script_dir"
+}
+
+it_utc_now() {
+    date -u +%Y-%m-%dT%H:%M:%SZ
+}
+
+it_hostname() {
+    hostname 2>/dev/null || echo unknown
+}
+
+it_dir_has_files() {
+    [ -d "$1" ] && [ -n "$(ls -A "$1" 2>/dev/null)" ]
+}
+
+it_extract_xml_property() {
+    local file="$1"
+    local property="$2"
+
+    grep -m1 "name=\"$property\"" "$file" 2>/dev/null \
+        | sed -n 's/.*value="\([^"]*\)".*/\1/p'
+}
+
+it_read_pom_profile_property() {
+    local pom="$1"
+    local profile="$2"
+    local property="$3"
+
+    awk -v profile="$profile" -v prop="$property" '
+        $0 ~ "<id>" profile "</id>" { in_profile=1 }
+        in_profile && $0 ~ "<" prop ">" {
+            gsub(/.*<[^>]+>/, "")
+            gsub(/<.*/, "")
+            print
+            exit
+        }
+        in_profile && $0 ~ "</profile>" { exit }
+    ' "$pom"
+}
+
+it_infer_search_engine() {
+    local target_dir="$1"
+
+    if it_dir_has_files "$target_dir/opensearch0/logs"; then
+        echo "opensearch"
+    elif it_dir_has_files "$target_dir/elasticsearch0/logs"; then
+        echo "elasticsearch"
+    elif [ -f "$target_dir/opensearch-port.properties" ]; then
+        echo "opensearch"
+    elif [ -f "$target_dir/elasticsearch-port.properties" ]; then
+        echo "elasticsearch"
+    else
+        echo "unknown"
+    fi
+}
+
+it_run_label() {
+    basename "$1"
+}
+
+it_read_properties_field() {
+    local file="$1"
+    local field="$2"
+
+    if [ ! -f "$file" ]; then
+        echo ""
+        return
+    fi
+    grep -m1 "^${field}=" "$file" 2>/dev/null | cut -d= -f2-
+}

Review Comment:
   `it_read_properties_field` uses a regex pattern built from the raw field 
name. Since fields like `run.fingerprint` contain `.` (regex wildcard), this 
can match unintended keys and return the wrong value.



##########
itests/it-run-ui.sh:
##########
@@ -0,0 +1,255 @@
+# Presentation layer for IT run tooling (sourced, not executed directly).
+
+UI_USE_COLOR=0
+UI_SPINNER_PID=""
+
+ui_init() {
+    UI_USE_COLOR=0
+    if [ -t 1 ] && [ -z "${NO_COLOR:-}" ] && [ -z "${CI:-}" ]; then
+        UI_USE_COLOR=1
+    fi
+}
+
+ui__color_enabled() {
+    [ "$UI_USE_COLOR" -eq 1 ]
+}
+
+ui__fg() {
+    local code="$1"
+    ui__color_enabled && printf '\033[%sm' "$code"
+}
+
+ui__reset() { ui__fg 0; }
+ui__bold()  { ui__fg 1; }
+ui__dim()   { ui__fg 2; }
+ui__red()   { ui__fg 31; }
+ui__grn()   { ui__fg 32; }
+ui__ylw()   { ui__fg 33; }
+ui__blu()   { ui__fg 34; }
+ui__mag()   { ui__fg 35; }
+ui__cyn()   { ui__fg 36; }
+
+ui__line() {
+    local glyph="$1"
+    local color_fn="$2"
+    local indent="$3"
+    local message="$4"
+    local stream="${5:-1}"
+    local plain_label="${6:-}"
+
+    if ui__color_enabled; then
+        $color_fn
+        printf '%s%s ' "$indent" "$glyph"
+        ui__reset
+        printf '%s\n' "$message" >&"$stream"
+    elif [ "$stream" -eq 2 ]; then
+        printf '%s: %s\n' "$plain_label" "$message" >&2
+    else
+        printf '%s%s\n' "$indent" "$message"
+    fi
+}
+
+ui__glyph_status() {
+    local ok="$1"
+    local message="$2"
+    local stream="${3:-2}"
+
+    if ui__color_enabled; then
+        if [ "$ok" -eq 0 ]; then
+            ui__grn; printf '  ✔ '; ui__reset
+        else
+            ui__red; printf '  ✖ '; ui__reset
+            message="$message (failed)"
+        fi
+        echo "$message" >&"$stream"
+    elif [ "$ok" -ne 0 ]; then
+        echo "ERROR: $message" >&"$stream"
+    fi

Review Comment:
   In `ui__glyph_status`, the check/cross glyph is printed to stdout even when 
the message is sent to stderr (stream=2), splitting a single status line across 
streams.



##########
itests/it-run-compare-lib.sh:
##########
@@ -0,0 +1,301 @@
+# Cross-run comparison engine (sourced by compare-it-runs.sh and 
archive-it-run.sh).
+

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/it-run-bootstrap.sh:
##########
@@ -0,0 +1,18 @@
+# Source all IT run tooling modules (sourced, not executed).
+

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/it-run-context-lib.sh:
##########
@@ -0,0 +1,139 @@
+# Run context and manifest assembly for archive-it-run.sh.
+# Expects archive hooks: target_path, staging_path, log_staged_file, 
mark_included,
+# and globals: SCRIPT_DIR, REPO_ROOT, TARGET_DIR, MANIFEST, OUTPUT, 
RUN_MESSAGE,
+# RUN_ID, FULL_KARAF, CREATE_TAR, RUN_CONTEXT_REL, RUN_TRACE_FILE.

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/it-run-ui.sh:
##########
@@ -0,0 +1,255 @@
+# Presentation layer for IT run tooling (sourced, not executed directly).
+

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/it-run-ui.sh:
##########
@@ -0,0 +1,255 @@
+# Presentation layer for IT run tooling (sourced, not executed directly).
+
+UI_USE_COLOR=0
+UI_SPINNER_PID=""
+
+ui_init() {
+    UI_USE_COLOR=0
+    if [ -t 1 ] && [ -z "${NO_COLOR:-}" ] && [ -z "${CI:-}" ]; then
+        UI_USE_COLOR=1
+    fi
+}
+
+ui__color_enabled() {
+    [ "$UI_USE_COLOR" -eq 1 ]
+}
+
+ui__fg() {
+    local code="$1"
+    ui__color_enabled && printf '\033[%sm' "$code"
+}
+
+ui__reset() { ui__fg 0; }
+ui__bold()  { ui__fg 1; }
+ui__dim()   { ui__fg 2; }
+ui__red()   { ui__fg 31; }
+ui__grn()   { ui__fg 32; }
+ui__ylw()   { ui__fg 33; }
+ui__blu()   { ui__fg 34; }
+ui__mag()   { ui__fg 35; }
+ui__cyn()   { ui__fg 36; }
+
+ui__line() {
+    local glyph="$1"
+    local color_fn="$2"
+    local indent="$3"
+    local message="$4"
+    local stream="${5:-1}"
+    local plain_label="${6:-}"
+
+    if ui__color_enabled; then
+        $color_fn
+        printf '%s%s ' "$indent" "$glyph"
+        ui__reset
+        printf '%s\n' "$message" >&"$stream"
+    elif [ "$stream" -eq 2 ]; then
+        printf '%s: %s\n' "$plain_label" "$message" >&2
+    else
+        printf '%s%s\n' "$indent" "$message"
+    fi

Review Comment:
   In `ui__line`, the glyph/color prefix is printed to stdout while the message 
may be printed to stderr (stream=2). This splits a single log line across two 
streams and makes output hard to read/pipe reliably.



##########
build.sh:
##########
@@ -985,16 +1034,25 @@ $MVN_CMD clean $MVN_OPTS || {
     exit 1
 }
 
+if [ "$RUN_INTEGRATION_TESTS" = true ]; then
+    write_it_run_trace_start
+fi
+
 print_progress $((++current_step)) $total_steps "Compiling and installing 
artifacts..."
 if [ "$HAS_COLORS" -eq 1 ]; then
     echo -e "${GRAY}Running: $MVN_CMD install $MVN_OPTS${NC}"
 else
     echo "Running: $MVN_CMD install $MVN_OPTS"
 fi
-$MVN_CMD install $MVN_OPTS || {
+$MVN_CMD install $MVN_OPTS
+INSTALL_EXIT=$?
+if [ "$RUN_INTEGRATION_TESTS" = true ]; then
+    finalize_it_run_trace "$INSTALL_EXIT"
+fi
+if [ "$INSTALL_EXIT" -ne 0 ]; then
     print_status "error" "Maven install failed"
     exit 1
-}
+fi

Review Comment:
   Because build.sh runs with `set -e`, executing `mvn install` as a standalone 
command will cause an immediate exit on failure before `INSTALL_EXIT` is 
captured and before `finalize_it_run_trace` runs. This breaks the intended 
trace finalization and also bypasses the existing error reporting path.



##########
itests/it-run-karaf-lib.sh:
##########
@@ -0,0 +1,748 @@
+# Karaf log triage for archive-it-run.sh (supports log4j2 rollover: karaf.log, 
karaf.log.1, …).
+# Expects archive hooks: target_path, staging_path, log_staged_file, 
register_staged_file,
+# mark_included, ui_spinner_run, ui_detail, line_matches_expected_pattern, 
PATTERNS_FILE,
+# TARGET_DIR, FULL_KARAF, MANIFEST.

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/it-run-lib.sh:
##########
@@ -0,0 +1,442 @@
+# Shared helpers for IT run scripts (sourced, not executed).
+
+IT_SCRIPT_DIR=""
+IT_ARCHIVES_DIR=""
+
+IT_ARCHIVES_DIR_NAME="archives"
+IT_RUN_DIR_GLOB="it-run-*"
+IT_TEST_RESULTS="test-results.tsv"
+IT_RUN_SUMMARY="run-summary.properties"
+IT_RUNS_INDEX="runs-index.tsv"
+IT_FAILSAFE_XML="TEST-org.apache.unomi.itests.AllITs.xml"
+IT_LATEST_COMPARISON="latest-comparison.txt"
+IT_CAPTURE_COMPARISON="comparison-last-3.txt"
+IT_AUTO_COMPARE_LAST=3
+IT_RUN_FINGERPRINT_FIELD="run.fingerprint"
+IT_TRACE_FINGERPRINT_FIELDS='^(search\.engine|search\.heap|karaf\.heap|single\.test|use\.opensearch|maven\.exit\.code)='
+IT_RUN_SUMMARY_EXCERPT_FIELDS='^(run\.captured|search\.engine|it\.karaf\.heap|elasticsearch\.heap|opensearch\.heap|tests\.failures|tests\.errors|operator\.note)='
+IT_ENGINE_PORT_FILES=(
+    elasticsearch-port.properties
+    opensearch-port.properties
+)
+IT_RUN_CONTEXT_NO_OPERATOR_NOTE='(none — pass --message "..." to describe run 
conditions, e.g. heavy swap, CI runner, single-test rerun)'
+
+it_run_lib_init() {
+    IT_SCRIPT_DIR="$1"
+    IT_ARCHIVES_DIR="$IT_SCRIPT_DIR/$IT_ARCHIVES_DIR_NAME"
+}
+
+it_run_tools_init() {
+    local script_dir="$1"
+    cd "$script_dir"
+    it_run_lib_init "$script_dir"
+}
+
+it_utc_now() {
+    date -u +%Y-%m-%dT%H:%M:%SZ
+}
+
+it_hostname() {
+    hostname 2>/dev/null || echo unknown
+}
+
+it_dir_has_files() {
+    [ -d "$1" ] && [ -n "$(ls -A "$1" 2>/dev/null)" ]
+}
+
+it_extract_xml_property() {
+    local file="$1"
+    local property="$2"
+
+    grep -m1 "name=\"$property\"" "$file" 2>/dev/null \
+        | sed -n 's/.*value="\([^"]*\)".*/\1/p'
+}
+
+it_read_pom_profile_property() {
+    local pom="$1"
+    local profile="$2"
+    local property="$3"
+
+    awk -v profile="$profile" -v prop="$property" '
+        $0 ~ "<id>" profile "</id>" { in_profile=1 }
+        in_profile && $0 ~ "<" prop ">" {
+            gsub(/.*<[^>]+>/, "")
+            gsub(/<.*/, "")
+            print
+            exit
+        }
+        in_profile && $0 ~ "</profile>" { exit }
+    ' "$pom"
+}
+
+it_infer_search_engine() {
+    local target_dir="$1"
+
+    if it_dir_has_files "$target_dir/opensearch0/logs"; then
+        echo "opensearch"
+    elif it_dir_has_files "$target_dir/elasticsearch0/logs"; then
+        echo "elasticsearch"
+    elif [ -f "$target_dir/opensearch-port.properties" ]; then
+        echo "opensearch"
+    elif [ -f "$target_dir/elasticsearch-port.properties" ]; then
+        echo "elasticsearch"
+    else
+        echo "unknown"
+    fi
+}
+
+it_run_label() {
+    basename "$1"
+}
+
+it_read_properties_field() {
+    local file="$1"
+    local field="$2"
+
+    if [ ! -f "$file" ]; then
+        echo ""
+        return
+    fi
+    grep -m1 "^${field}=" "$file" 2>/dev/null | cut -d= -f2-
+}
+
+it_read_run_summary_field() {
+    it_read_properties_field "$1/$IT_RUN_SUMMARY" "$2"
+}
+
+it_validate_run_dir() {
+    local dir="$1"
+
+    if [ ! -d "$dir" ]; then
+        ui_error "Not a directory: $dir"
+        exit 1
+    fi
+    if [ ! -f "$dir/$IT_TEST_RESULTS" ]; then
+        ui_error "Missing $IT_TEST_RESULTS in $dir (run archive-it-run.sh 
first)"
+        exit 1
+    fi
+}
+
+it_find_archive_run_dirs() {
+    find "$IT_ARCHIVES_DIR" -maxdepth 1 -type d -name "$IT_RUN_DIR_GLOB" 
2>/dev/null | sort -r
+}
+
+it_collect_comparable_run_dirs() {
+    local max_count="$1"
+    local -a dirs=()
+    local dir
+
+    while IFS= read -r dir; do
+        [ -n "$dir" ] || continue
+        [ -f "$dir/$IT_TEST_RESULTS" ] || continue
+        dirs+=("$dir")
+        if [ "$max_count" -gt 0 ] && [ "${#dirs[@]}" -ge "$max_count" ]; then
+            break
+        fi
+    done < <(it_find_archive_run_dirs)
+
+    printf '%s\n' "${dirs[@]}"
+}
+
+# Newest N captures that have test-results.tsv, returned oldest → newest (for 
compare).
+it_find_comparable_run_dirs() {
+    local count="$1"
+    local -a dirs=()
+    local i
+
+    while IFS= read -r dir; do
+        [ -n "$dir" ] && dirs+=("$dir")
+    done < <(it_collect_comparable_run_dirs "$count")
+
+    for ((i = ${#dirs[@]} - 1; i >= 0; i--)); do
+        printf '%s\n' "${dirs[i]}"
+    done
+}
+
+it_count_comparable_runs() {
+    it_collect_comparable_run_dirs 0 | wc -l | tr -d ' '
+}
+
+it_read_comparison_total() {
+    local value
+    value="$(it_read_properties_field "$1" "$2")"
+    if [ -z "$value" ]; then
+        echo "0"
+    else
+        echo "$value"
+    fi
+}
+
+it_append_run_summary_excerpt() {
+    local run_dir="$1"
+
+    if [ ! -f "$run_dir/$IT_RUN_SUMMARY" ]; then
+        return
+    fi
+    grep -E "$IT_RUN_SUMMARY_EXCERPT_FIELDS" "$run_dir/$IT_RUN_SUMMARY" 
2>/dev/null || true
+}
+
+it_failsafe_testcase_awk() {
+    cat <<'AWK'
+function attr(line, key,    pos, rest) {
+    pos = index(line, key "=\"")
+    if (pos == 0) return ""
+    rest = substr(line, pos + length(key) + 2)
+    sub(/".*/, "", rest)
+    return rest
+}
+/<testcase / {
+    current = attr($0, "classname") "\t" attr($0, "name")
+    if (current == "\t") current = ""
+    if (current != "") {
+        status[current] = "PASS"
+        elapsed[current] = attr($0, "time")
+    }
+}
+current != "" && /<failure/ { status[current] = "FAIL" }
+current != "" && /<error/   { status[current] = "ERROR" }
+current != "" && /<skipped/ { status[current] = "SKIP" }
+AWK
+}
+
+it_write_test_results_from_xml() {
+    local xml="$1"
+    local tsv="$2"
+    local failed="${3:-}"
+
+    awk "$(it_failsafe_testcase_awk)
+    END {
+        print \"test_class\ttest_method\tstatus\telapsed_s\"
+        for (t in status) {
+            split(t, parts, \"\\t\")
+            print parts[1] \"\\t\" parts[2] \"\\t\" status[t] \"\\t\" 
elapsed[t]
+        }
+    }" "$xml" | sort -t $'\t' -k1,1 -k2,2 > "$tsv"
+
+    if [ -n "$failed" ]; then
+        awk -F '\t' '$3 == "FAIL" || $3 == "ERROR" { print $1 "." $2 }' "$tsv" 
>> "$failed"
+    fi
+}
+
+it_test_outcomes_fingerprint_from_xml() {
+    local xml="$1"
+
+    awk "$(it_failsafe_testcase_awk)
+    END {
+        for (t in status) {
+            split(t, parts, \"\\t\")
+            print parts[1] \"\\t\" parts[2] \"\\t\" status[t]
+        }
+    }" "$xml" | sort -t $'\t' -k1,1 -k2,2 | it_hash_lines
+}
+
+it_summarize_run_dir() {
+    local run_dir="$1"
+    local label failures errors
+
+    label="$(it_run_label "$run_dir")"
+    failures="$(it_read_run_summary_field "$run_dir" tests.failures)"
+    errors="$(it_read_run_summary_field "$run_dir" tests.errors)"
+    failures="${failures:-?}"
+    errors="${errors:-?}"
+    printf '%s (failures=%s errors=%s)' "$label" "$failures" "$errors"
+}
+
+it_hash_lines() {
+    shasum -a 256 2>/dev/null | awk '{print $1}'
+}

Review Comment:
   `it_hash_lines` relies on `shasum`, which is not guaranteed to exist on all 
Linux distributions. Since these tools are intended for developers across 
platforms, add a fallback to `sha256sum` (coreutils) (and optionally `openssl`).



##########
itests/it-run-lib.sh:
##########
@@ -0,0 +1,442 @@
+# Shared helpers for IT run scripts (sourced, not executed).
+

Review Comment:
   This new shell library is missing the standard ASF license header comment 
block that other scripts in `itests/` include (e.g. `jacoco-report.sh`).



##########
itests/llm-it-run-analysis-guide.md:
##########
@@ -0,0 +1,109 @@
+# LLM analysis guide — Apache Unomi integration test archive

Review Comment:
   This new documentation file is missing the standard ASF license header (the 
existing `itests/README.md` includes it). Apache projects typically require 
headers on newly added source/docs files too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] UNOMI-944: Add IT developer tooling for run archival, cross-run comparison and live Karaf inspection [unomi]

Reply via email to