This is an automated email from the ASF dual-hosted git repository. sergehuber pushed a commit to branch UNOMI-944-it-dev-tooling in repository https://gitbox.apache.org/repos/asf/unomi.git
commit 1ceb21aa99602028a90fd3f2bcfc23cc6de257e8 Author: Serge Huber <[email protected]> AuthorDate: Sun Jun 7 15:11:16 2026 +0200 UNOMI-944: Add IT developer tooling for run archival, cross-run comparison and live Karaf inspection --- .gitignore | 1 + build.sh | 62 ++- itests/README.md | 598 +++++++++++++++++++--------- itests/archive-it-run.sh | 520 +++++++++++++++++++++++++ itests/compare-it-runs.sh | 84 ++++ itests/it-run-bootstrap.sh | 18 + itests/it-run-compare-lib.sh | 301 +++++++++++++++ itests/it-run-context-lib.sh | 139 +++++++ itests/it-run-karaf-lib.sh | 748 ++++++++++++++++++++++++++++++++++++ itests/it-run-lib.sh | 442 +++++++++++++++++++++ itests/it-run-ui.sh | 255 ++++++++++++ itests/kt.sh | 259 +++++++++++++ itests/llm-it-run-analysis-guide.md | 109 ++++++ 13 files changed, 3347 insertions(+), 189 deletions(-) diff --git a/.gitignore b/.gitignore index 84df9f5ef..10feb0dc7 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,5 @@ dependency_tree.txt /.cursor/ /.local-notes/ itests/snapshots_repository/ +itests/archives/ .env.local diff --git a/build.sh b/build.sh index 9185fb24b..1caa3fd70 100755 --- a/build.sh +++ b/build.sh @@ -407,6 +407,9 @@ EOF exit 1 } +# Preserve the original invocation for IT run tracing (archive-it-run.sh reads this). +BUILD_SCRIPT_INVOCATION=("$0" "$@") + # Parse command line arguments while [ "$1" != "" ]; do case $1 in @@ -974,6 +977,52 @@ start_timer total_steps=2 current_step=0 +write_it_run_trace_start() { + local trace_file="$DIRNAME/itests/target/it-run-trace.properties" + local invocation + mkdir -p "$(dirname "$trace_file")" + invocation="$(printf '%q ' "${BUILD_SCRIPT_INVOCATION[@]}")" + { + echo "# IT run trace (written by build.sh after clean, before install)" + echo "trace.phase=started" + echo "trace.started=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "build.invocation=$invocation" + echo "maven.clean.command=$MVN_CMD clean $MVN_OPTS" + echo "maven.install.command=$MVN_CMD install $MVN_OPTS" + echo "use.opensearch=$USE_OPENSEARCH" + echo "search.engine=$([ "$USE_OPENSEARCH" = true ] && echo opensearch || echo elasticsearch)" + echo "search.heap=${SEARCH_HEAP:-}" + echo "karaf.heap=${KARAF_HEAP:-}" + echo "single.test=${SINGLE_TEST:-}" + echo "it.debug=$IT_DEBUG" + echo "it.debug.port=${IT_DEBUG_PORT:-}" + echo "it.debug.suspend=$IT_DEBUG_SUSPEND" + echo "skip.migration.tests=$SKIP_MIGRATION_TESTS" + echo "it.keep.container=$KEEP_CONTAINER" + echo "maven.debug=$MAVEN_DEBUG" + echo "maven.offline=$MAVEN_OFFLINE" + echo "maven.quiet=$MAVEN_QUIET" + echo "maven.opts=${MAVEN_OPTS:-}" + echo "maven.extra.opts=${MAVEN_EXTRA_OPTS:-}" + echo "profiles=${PROFILES:-}" + echo "host=$(hostname 2>/dev/null || echo unknown)" + echo "uname=$(uname -a 2>/dev/null || echo unknown)" + } > "$trace_file" +} + +finalize_it_run_trace() { + local exit_code="$1" + local trace_file="$DIRNAME/itests/target/it-run-trace.properties" + if [ ! -f "$trace_file" ]; then + return + fi + { + echo "trace.phase=completed" + echo "trace.completed=$(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "maven.exit.code=$exit_code" + } >> "$trace_file" +} + print_progress $((++current_step)) $total_steps "Cleaning previous build..." if [ "$HAS_COLORS" -eq 1 ]; then echo -e "${GRAY}Running: $MVN_CMD clean $MVN_OPTS${NC}" @@ -985,16 +1034,25 @@ $MVN_CMD clean $MVN_OPTS || { exit 1 } +if [ "$RUN_INTEGRATION_TESTS" = true ]; then + write_it_run_trace_start +fi + print_progress $((++current_step)) $total_steps "Compiling and installing artifacts..." if [ "$HAS_COLORS" -eq 1 ]; then echo -e "${GRAY}Running: $MVN_CMD install $MVN_OPTS${NC}" else echo "Running: $MVN_CMD install $MVN_OPTS" fi -$MVN_CMD install $MVN_OPTS || { +$MVN_CMD install $MVN_OPTS +INSTALL_EXIT=$? +if [ "$RUN_INTEGRATION_TESTS" = true ]; then + finalize_it_run_trace "$INSTALL_EXIT" +fi +if [ "$INSTALL_EXIT" -ne 0 ]; then print_status "error" "Maven install failed" exit 1 -} +fi print_status "success" "Build completed in $(get_elapsed_time)" diff --git a/itests/README.md b/itests/README.md index 1920738f8..b2500f8fb 100644 --- a/itests/README.md +++ b/itests/README.md @@ -15,170 +15,390 @@ ~ limitations under the License. --> -Apache Unomi Integration tests +Apache Unomi Integration Tests ================================= -## Information -You will likely run into situation where you need to wait for the execution of your test. -To do so please avoid long Thread.sleep(10000) it tend to make the test unstable, prefer a shorter sleep that you will repeat. -e.g: +## Overview + +The integration tests verify Apache Unomi end-to-end: REST and GraphQL APIs, profile +resolution, segmentation, rules, event processing, JSON schema validation, authentication, +and data migration. + +**How they work** + +- **[Pax Exam](https://ops4j1.jira.com/wiki/spaces/PAXEXAM4/overview)** provisions and + starts a real Karaf/OSGi container with all Unomi bundles installed. +- **Docker** starts a real Elasticsearch or OpenSearch instance (managed by the + `docker-maven-plugin`). +- **Maven Failsafe** runs a single entry point — `AllITs` — which aggregates all test + classes. Each test class extends `BaseIT`, which handles Karaf startup, OSGi service + injection, and common test utilities. + +A full IT run typically takes 20–30 minutes. The Karaf instance is created fresh for each +run under `itests/target/exam/` with a UUID directory name. + +**Available tools in this directory** + +| Script | Purpose | +|--------|---------| +| `build.sh` (project root) | Recommended way to run and configure IT runs | +| `kt.sh` | Live Karaf inspection during a run (log, tail, grep, debug) | +| `archive-it-run.sh` | Capture IT run artifacts before the next build wipes them | +| `compare-it-runs.sh` | Diff multiple captures to classify flaky vs systematic failures | +| `jacoco-report.sh` | Generate a JaCoCo coverage report after a run | + +--- + +## Writing Integration Tests + +### Class structure + +Every IT class must extend `BaseIT` and carry two Pax Exam annotations: + ```java -boolean isDone = false; -while (!isDone) { - importConfiguration = importConfigurationService.load(itemId); - if (importConfiguration != null && importConfiguration.getStatus() != null) { - isDone = importConfiguration.getStatus().equals(RouterConstants.CONFIG_STATUS_COMPLETE_SUCCESS); +@RunWith(PaxExam.class) +@ExamReactorStrategy(PerSuite.class) // one Karaf container shared across the whole suite +public class MyFeatureIT extends BaseIT { + + @Before + public void setUp() throws InterruptedException { + // create scopes, load fixtures, etc. + } + + @After + public void tearDown() { + // delete everything your test created + } + + @Test + public void testSomething() throws Exception { + // ... } - Thread.sleep(1000); } ``` -**NEVER** create dependencies between your test, even in the same class, the execution order is not guaranteed therefore you may not -have what you expect and it could work fine on your machine but not on others. +`BaseIT.waitForStartup()` runs before your `@Before` and handles Karaf startup, service +injection, and log-checker initialisation. Do not call it yourself. + +### Available services + +`BaseIT` injects all core Unomi services as protected fields. The most commonly used: + +| Field | Service | +|-------|---------| +| `profileService` | Load, save, and query profiles | +| `eventService` | Send and query events | +| `segmentService` | Evaluate and manage segments | +| `rulesService` | Create, update, and refresh rules | +| `definitionsService` | Condition and action type definitions | +| `persistenceService` | Low-level index refresh and direct queries | +| `schemaService` | JSON schema registration and validation | +| `scopeService` | Scope lifecycle | +| `privacyService` | Anonymisation and deletion | + +For any service not listed, use `getOsgiService(MyService.class, 60000)`. + +### Polling helpers + +Never use raw `Thread.sleep`. Use the polling helpers from `BaseIT`: + +```java +// Wait until a condition becomes true — fails after retries +Profile p = keepTrying("Profile not found", + () -> profileService.load(profileId), + Objects::nonNull, + DEFAULT_TRYING_TIMEOUT, // 1000 ms between attempts + DEFAULT_TRYING_TRIES); // 10 retries max + +// Wait until something is deleted / returns null +waitForNullValue("Profile still exists", + () -> profileService.load(profileId), + DEFAULT_TRYING_TIMEOUT, DEFAULT_TRYING_TRIES); + +// Assert a condition stays true throughout the period (stability check) +shouldBeTrueUntilEnd("Segment membership changed unexpectedly", + () -> segmentService.isProfileInSegment(profile, segmentId), + result -> result, + DEFAULT_TRYING_TIMEOUT, DEFAULT_SHOULDBETRUE_TRIES); // 5 retries + +// Convenience: wait for a specific profile property value +waitForProfileProperty(profileId, "email", "[email protected]"); +``` + +### HTTP helpers -If possible clean what your test created at the end of its execution or at the very least make sure to use unique IDs +`BaseIT` provides thin wrappers around Apache HttpClient, pre-configured with auth: -When you need a service from Unomi to execute your test, you can add it to the BaseIT code: ```java -@Before -public void waitForStartup() throws InterruptedException { - // ... - // init unomi services that are available once unomi:start have been called - persistenceService=getOsgiService(PersistenceService.class, 600000); +// GET — deserialises JSON response directly +Profile profile = get("/cxs/profiles/" + profileId, Profile.class); + +// POST — body is a classpath resource path (under src/test/resources) +CloseableHttpResponse resp = post("/cxs/profiles/search", "queries/profile-search.json"); + +// POST with explicit content type +CloseableHttpResponse resp = post("/cxs/eventcollector", "events/view.json", JSON_CONTENT_TYPE); + +// DELETE +CloseableHttpResponse resp = delete("/cxs/profiles/" + profileId); + +// Full URL construction (port is wired automatically) +String url = getFullUrl("/cxs/profiles/" + profileId); +``` + +### Resource helpers + +Place JSON fixtures under `src/test/resources/` and load them with: + +```java +String json = resourceAsString("events/view-event.json"); // from bundle resources +String json = bundleResourceAsString("events/view-event.json"); // from OSGi bundle +``` + +### Log checking + +After each test, `BaseIT` scans Karaf logs for unexpected ERROR/WARN lines. If your test +intentionally triggers errors (e.g. sending invalid input), suppress the expected noise: + +```java +// Override in your class to declare expected patterns +@Override +protected LogChecker createLogChecker() { + return LogChecker.builder() + .addIgnoredSubstring("Response status code: 400") + .addIgnoredMultiPart("Schema", "not found") + .build(); } -``` -This will ensure the service is available before starting the test. -## Running integration tests -You can run the integration tests along with the build by doing: +// Or add patterns dynamically inside a test method +addIgnoredLogSubstring("invalid input for test"); +``` - mvn clean install -P integration-tests - -from the project's root directory +### Best practices -### Bypassing Maven Build Cache +- **Use unique IDs.** Prefix profile IDs, session IDs, and scope names with your test + class name or a UUID to avoid collisions with other tests running in the same suite. +- **Never depend on another test.** Execution order is not guaranteed, even within the + same class. Each test must create its own state. +- **Always clean up.** Delete what your test created in `@After`. At minimum use unique + IDs so leftover data does not affect other tests. +- **Force index refresh when needed.** After writes, call + `persistenceService.refresh()` or use `query.setForceRefresh(true)` before asserting + on persistence-backed data. +- **Register your class in `AllITs`.** New test classes must be added to + `AllITs.java` to be picked up by the Failsafe runner. -If you encounter issues with cached builds interfering with test execution, you can bypass the Maven Build Cache by adding the `-Dmaven.build.cache.enabled=false` parameter: +--- - mvn clean install -P integration-tests -Dmaven.build.cache.enabled=false +## Running Integration Tests -This is particularly useful when you want to ensure a completely fresh build and test execution, regardless of previous successful builds. +The recommended way is through `build.sh` from the project root: -### Search Engine Selection +```bash +# Run with Elasticsearch (default) +./build.sh --integration-tests + +# Run with OpenSearch +./build.sh --integration-tests --use-opensearch -Apache Unomi supports both ElasticSearch and OpenSearch as search engine backends. The integration tests can be configured to run against either engine: +# Run a single test class +./build.sh --integration-tests --single-test org.apache.unomi.itests.BasicIT + +# Run a specific test method +./build.sh --integration-tests --single-test org.apache.unomi.itests.ContextServletIT#testContextEndpointAuthentication +``` + +You can also invoke Maven directly from the project root: ```bash -# Run with ElasticSearch (default) +# Run with Elasticsearch (default) mvn clean install -P integration-tests # Run with OpenSearch -# Activate via property only. Do not pass -P opensearch or !elasticsearch; -# the property alone handles activation/deactivation. +# Use the property only — do not pass -P opensearch or !elasticsearch mvn clean install -P integration-tests -Duse.opensearch=true + +# Run a single test class +mvn clean install -P integration-tests -Dit.test=org.apache.unomi.itests.BasicIT + +# Run a specific test method +mvn clean install -P integration-tests -Dit.test=org.apache.unomi.itests.ContextServletIT#testContextEndpointAuthentication + +# Run all methods matching a pattern +mvn clean install -P integration-tests -Dit.test=org.apache.unomi.itests.ContextServletIT#test*Authentication* +``` + +See the [Maven Failsafe plugin docs](https://maven.apache.org/surefire/maven-failsafe-plugin/examples/single-test.html) for more filtering options. + +### Bypassing the Maven Build Cache + +If a cached build is interfering with test execution, use `--purge-maven-cache` to wipe +the local Maven cache before building: + +```bash +./build.sh --integration-tests --purge-maven-cache ``` -## Debugging integration tests +This removes `~/.m2/build-cache`, `~/.m2/dependency-cache`, and +`~/.m2/dependency-cache_v2`. It cannot be combined with `--offline`. + +--- + +## Debugging Integration Tests + +### Attaching a remote debugger to the test JVM -If you want to run the tests with a debugger, you can use the `it.karaf.debug` system property. -Here's an example: +Use `--it-debug` to enable remote debugging of the test code running inside Karaf. +Add `--it-debug-suspend` to pause Karaf until your debugger connects: - cd itests - mvn clean install -Dit.karaf.debug=hold:true - -The `hold:true` will tell the JVM to pause for you to connect a debugger. You can simply connect a remote debugger on $ -port 5006 to debug the integration tests. +```bash +# Via build.sh — pause until debugger connects (port 5006) +./build.sh --integration-tests --it-debug --it-debug-suspend + +# Custom port +./build.sh --integration-tests --it-debug --it-debug-suspend --it-debug-port 5008 -Here are the parameters supported by the `it.karaf.debug` property: +# Via Maven (from project root) +mvn clean install -P integration-tests -Dit.karaf.debug=hold:true,port=5006 +``` - hold:true - forces a wait for a remote debugger to connect - hold:false - continues even with no remote debugger connected - port:XXXX allows to configure the binding port to XXXX - -You can combine both parameters using a comma as a separator, as in the following example: +Connect your IDE remote debugger to `localhost:5006` (or whichever port you chose). - mvn clean install -Dit.karaf.debug=hold:true,port=5006 - -### Karaf Resolver Debug Logging +Supported `it.karaf.debug` parameters (comma-separated): -To enable debug logging for the Karaf Resolver and Karaf features service during integration tests, you can use the `it.unomi.resolver.debug` system property: +| Parameter | `build.sh` equivalent | Description | +|--------------|-------------------------|-----------------------------------------| +| `hold:true` | `--it-debug-suspend` | Pause until a debugger connects | +| `hold:false` | `--it-debug` (default) | Enable debug without pausing | +| `port:XXXX` | `--it-debug-port XXXX` | Change the debug port (default: 5006) | - mvn clean install -P integration-tests -Dit.unomi.resolver.debug=true +### Karaf Resolver debug logging -Alternatively, you can use the build scripts: +To diagnose bundle refresh or feature installation issues: - # Using build.sh (Unix/Linux/macOS) - ./build.sh --integration-tests --resolver-debug +```bash +# Via build.sh +./build.sh --integration-tests --resolver-debug - # Using build.ps1 (Windows PowerShell) - .\build.ps1 -IntegrationTests -ResolverDebug +# Via Maven +mvn clean install -P integration-tests -Dit.unomi.resolver.debug=true +``` + +This enables DEBUG logging for `org.osgi.service.resolver`, `org.apache.karaf.features`, +`org.apache.karaf.resolver`, `org.osgi.framework`, and `org.osgi.service.packageadmin`. + +### Live Karaf inspection with `kt.sh` + +During a running test, the Karaf instance lives under `target/exam/` with a UUID directory +name. `kt.sh` locates it automatically and gives you convenient shortcuts: + +```bash +cd itests +./kt.sh tail # follow the log in real time +./kt.sh grep ERROR # search for errors +./kt.sh log # open the full log in less +./kt.sh dir # print the path to the Karaf directory +./kt.sh pushd # cd into the Karaf directory (use popd to return) +./kt.sh start # start the Karaf instance +./kt.sh debug # start Karaf in debug mode (port 5005) +./kt.sh console # start Karaf in foreground console mode +./kt.sh stop # stop the running Karaf instance +``` -This enables DEBUG logging for the following components: -- `org.osgi.service.resolver` (OSGi resolver) -- `org.apache.karaf.features` (Karaf features service) -- `org.apache.karaf.resolver` (Karaf resolver) -- `org.osgi.framework` (OSGi framework) -- `org.osgi.service.packageadmin` (Package admin) +All commands have single-letter aliases (`t`, `g`, `l`, `i`, `p`, `s`, `d`, `c`, `x`). +Run `./kt.sh help` for the full list. -This is particularly useful when debugging bundle refresh issues or understanding why bundles are being refreshed during feature installation. - -## Running a single test +--- -If you want to run a single test or single methods, following the instructions given here: -https://maven.apache.org/surefire/maven-failsafe-plugin/examples/single-test.html +## Analyzing IT Run Failures -Here's an example: +### Build trace + +When you run integration tests via `build.sh`, a file is written to +`itests/target/it-run-trace.properties` capturing the exact Maven command, search engine, +heap sizes, flags, and timestamps. This lets you reproduce a reported failure precisely. + +### Capturing a run for post-mortem analysis + +After a test run, call `archive-it-run.sh` to snapshot the artifacts before the next build +wipes `itests/target/`: + +```bash +cd itests +./archive-it-run.sh +./archive-it-run.sh -m "Heavy swap, 2 failures in GraphQLListIT" # with an operator note +./archive-it-run.sh --full-karaf # include complete Karaf log +``` + +Each capture is saved under `itests/archives/it-run-YYYYMMDD-HHMMSS/` and includes: + +- Failsafe and surefire reports +- Karaf log tail and a filtered error/warning extract (unexpected errors only) +- Engine logs (Elasticsearch / OpenSearch) +- Build trace and run context +- A `test-results.tsv` with one row per test (for cross-run comparison) + +The archive also strips out expected Karaf noise — errors that tests deliberately trigger +(bad schemas, auth probes, invalid input) — so only genuine unexpected errors stand out. + +The `itests/archives/` directory is gitignored. + +### Comparing runs to distinguish flaky vs systematic failures + +Once you have two or more captures, use `compare-it-runs.sh` to diff them: + +```bash +cd itests +./compare-it-runs.sh --last 3 +./compare-it-runs.sh archives/it-run-20260601-120000 archives/it-run-20260602-120000 +``` - mvn clean install -Dit.karaf.debug=hold:true -Dit.test=org.apache.unomi.itests.BasicIT +Each test is classified as consistently failing, consistently passing, or flaky across the +selected runs. -To run a specific test method within a test class, you can use the # symbol followed by the method name: +--- - mvn clean install -Dit.test=org.apache.unomi.itests.ContextServletIT#testContextEndpointAuthentication +## Coverage Report -You can also use patterns to run multiple methods that match a pattern: +To generate a JaCoCo coverage report after running integration tests: - mvn clean install -Dit.test=org.apache.unomi.itests.ContextServletIT#test*Authentication* +```bash +cd itests +./jacoco-report.sh +``` -## Migration tests +The report is generated under `itests/target/site/jacoco/`. -Migration can now be tested, by reusing an ElasticSearch snapshot. -The snapshot should be from a Unomi version where you want to start the migration from. +--- -The snapshot is copied to the /target folder using a maven ant plugin: +## Migration Tests - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <version>1.8</version> - <executions> - <execution> - <phase>generate-resources</phase> - <configuration> - <tasks> - <unzip src="${project.basedir}/src/test/resources/migration/snapshots_repository.zip" dest="${project.build.directory}" /> - </tasks> - </configuration> - <goals> - <goal>run</goal> - </goals> - </execution> - </executions> - </plugin> +Migration can be tested by restoring an Elasticsearch snapshot from an older Unomi version +and running the migration command. -Also the ElasticSearch maven plugin is configured to allow this snapshot repository using conf: +The snapshot is unpacked by Maven during the build (via `maven-antrun-plugin`) and +Elasticsearch is configured to allow the snapshot repository: - <path.repo>${project.build.directory}/snapshots_repository</path.repo> +```xml +<path.repo>${project.build.directory}/snapshots_repository</path.repo> +``` -Now that migration accept configuration file we can provide it, this allows to avoid the migration process to prompt questions (in BaseIT configuration): +Provide a migration config file to avoid interactive prompts (in `BaseIT`): - replaceConfigurationFile("etc/org.apache.unomi.migration.cfg", new File("src/test/resources/migration/org.apache.unomi.migration.cfg")), +```java +replaceConfigurationFile("etc/org.apache.unomi.migration.cfg", + new File("src/test/resources/migration/org.apache.unomi.migration.cfg")) +``` -The config should contain all the required prop for the migration you want to do, example: +Example config: - esAddress = http://localhost:9400 - httpClient.trustAllCertificates = true - indexPrefix = context +```properties +esAddress = http://localhost:9400 +httpClient.trustAllCertificates = true +indexPrefix = context +``` -Then in the first Test of the suite you can restore the Snapshot and run the migration cmd, like this: +Restore the snapshot and trigger migration in the first test of the suite: ```java public class Migrate16xTo200IT extends BaseIT { @@ -186,132 +406,136 @@ public class Migrate16xTo200IT extends BaseIT { @Override @Before public void waitForStartup() throws InterruptedException { - - // Restore snapshot from 1.6.x try (CloseableHttpClient httpClient = HttpUtils.initHttpClient(true)) { // Create snapshot repo - HttpUtils.executePutRequest(httpClient, "http://localhost:9400/_snapshot/snapshots_repository/", resourceAsString("migration/create_snapshots_repository.json"), null); - // Get snapshot, insure it exists - String snapshot = HttpUtils.executeGetRequest(httpClient, "http://localhost:9400/_snapshot/snapshots_repository/snapshot_3", null); + HttpUtils.executePutRequest(httpClient, + "http://localhost:9400/_snapshot/snapshots_repository/", + resourceAsString("migration/create_snapshots_repository.json"), null); + // Verify snapshot exists + String snapshot = HttpUtils.executeGetRequest(httpClient, + "http://localhost:9400/_snapshot/snapshots_repository/snapshot_3", null); if (snapshot == null || !snapshot.contains("snapshot_3")) { throw new RuntimeException("Unable to retrieve 1.6.x snapshot for ES restore"); } // Restore the snapshot - HttpUtils.executePostRequest(httpClient, "http://localhost:9400/_snapshot/snapshots_repository/snapshot_3/_restore?wait_for_completion=true", "{}", null); + HttpUtils.executePostRequest(httpClient, + "http://localhost:9400/_snapshot/snapshots_repository/snapshot_3/_restore?wait_for_completion=true", + "{}", null); } catch (IOException e) { throw new RuntimeException(e); } - - // Do migrate the data set executeCommand("unomi:migrate 1.6.0 true"); - // Call super for starting Unomi and wait for the complete startup super.waitForStartup(); } @After public void cleanup() throws InterruptedException { - // Do some cleanup for next tests + // clean up data created by this test } @Test public void checkMigratedData() throws Exception { - // call Unomi services to check the migrated data is correct. + // call Unomi services to verify the migrated data is correct } } -``` - -### How to update a migration test ElasticSearch Snapshot ? - -In the following example we want to modify the snapshot: `snapshot_3`. -This snapshot has been done on Unomi 1.6.x using ElasticSearch 7.11.0. -So we will set up locally those servers in the exact same versions. -(For now just download them and do not start them yet.) - -To ease the migration, you can run the docker image of ElasticSearch 7.11.0 with the following command: - - docker run -p 9200:9200 -e path.repo="/tmp/snapshots_repository" -e discovery.type=single-node docker.elastic.co/elasticsearch/elasticsearch:7.11.0 - -Note that the path.repo is set to `/tmp/snapshots_repository` so you can use this path to store the snapshot repository. - -First we need to extract the zip of the snapshot repository from the test resources: - - /src/test/resources/migration/snapshots_repository.zip - -> If you use docker, you can copy the zip file to the docker container using the following command: -> docker cp src/test/resources/migration/snapshots_repository.zip <container_id>:/tmp/snapshots_repository.zip - -Then unzip it to the path you want to use as snapshot repository. - -In my case I unzip it to: - - /servers/elasticsearch-7.11.0/ +``` -> For docker unzip in the `/tmp` folder. +### Updating a migration snapshot -So I have the following folders structure: +To modify an existing snapshot (e.g. `snapshot_3`, taken on Unomi 1.6.x / Elasticsearch 7.11.0): - /servers/elasticsearch-7.11.0/snapshots_repository/snapshots +1. Start Elasticsearch 7.11.0 with the snapshot repository path configured: -Now we need to configure our ElasticSearch server to allow this path as repo, edit the `elasticsearch.yml` to add this: + **Docker (recommended):** + ```bash + docker run -p 9200:9200 \ + -e path.repo="/tmp/snapshots_repository" \ + -e discovery.type=single-node \ + docker.elastic.co/elasticsearch/elasticsearch:7.11.0 + ``` + **Local install:** edit `elasticsearch.yml` and add: + ```yaml path: - repo: - - /servers/elasticsearch-7.11.0/snapshots_repository + repo: + - /path/to/snapshots_repository + ``` -> This step is not required for docker. +2. Extract and copy the snapshot zip: -Start ElasticSearch server. -Now we have to add the snapshot repository, do the following request on your ElasticSearch instance: + **Docker:** + ```bash + docker cp src/test/resources/migration/snapshots_repository.zip \ + <container_id>:/tmp/snapshots_repository.zip + # then inside the container: + unzip /tmp/snapshots_repository.zip -d /tmp + ``` - PUT /_snapshot/snapshots_repository/ - { - "type": "fs", - "settings": { - "location": "snapshots" - } - } + **Local install:** unzip `src/test/resources/migration/snapshots_repository.zip` + to your configured `path.repo` directory. -Now we need to restore the snapshot we want to modify, -but first let's try to see if the snapshot with the id `snapshot_3` correctly exists: +3. Register the snapshot repository: - GET /_snapshot/snapshots_repository/snapshot_3 + ``` + PUT /_snapshot/snapshots_repository/ + { "type": "fs", "settings": { "location": "snapshots" } } + ``` -If the snapshot exists we can restore it: +4. Verify and restore the snapshot: + ``` + GET /_snapshot/snapshots_repository/snapshot_3 POST /_snapshot/snapshots_repository/snapshot_3/_restore?wait_for_completion=true {} + ``` -At the end of the previous request ElasticSearch should be ready and our Unomi snapshot is restored to version `1.6.x`. -Now make sure your Unomi server is correctly configured to connect to your running ElasticSearch, then start the Unomi server. -In my case it's Unomi version 1.6.0. +5. Configure Unomi to connect to your running Elasticsearch instance, then start the + matching Unomi version. Once it is up, add or modify the data you want captured in + the new snapshot: + - create new events + - create profiles with new properties to be migrated + - create rules, segments, etc. -Once Unomi started you can perform all the operations you want to be able to add the required data to the next snapshot, like: -- creating new events -- creating new profiles with new data to be migrated -- create rules/segments etc ... -- anything you want to be part of the new snapshot. + **Important:** add to the existing data — do not remove existing items, as they are + likely relied upon by current migration tests. -(NOTE: that it is important to add new data to the existing snapshot, but try to not removing things, -they are probably used by the actual migration tests already.) - -Once you data updated we need to recreate the snapshot, first we delete the old snapshot: +6. Delete and recreate the snapshot (check the Elasticsearch logs to confirm it completes): + ``` DELETE /_snapshot/snapshots_repository/snapshot_3 + PUT /_snapshot/snapshots_repository/snapshot_3 + DELETE /_snapshot/snapshots_repository + ``` -Then we recreate it: +7. Zip and replace in test resources: - PUT /_snapshot/snapshots_repository/snapshot_3 + **Docker:** + ```bash + # inside the container + zip -r /tmp/snapshots_repository.zip /tmp/snapshots_repository + # copy back to host + docker cp <container_id>:/tmp/snapshots_repository.zip \ + src/test/resources/migration/snapshots_repository.zip + ``` -Once the process finished (check the ElasticSearch logs to see that the snapshot is correctly created), -we need to remove the snapshot repository from our local ElasticSearch + **Local install:** + ```bash + zip -r snapshots_repository.zip /path/to/snapshots_repository + cp snapshots_repository.zip src/test/resources/migration/snapshots_repository.zip + ``` - DELETE /_snapshot/snapshots_repository +8. Update the migration test class to verify that the data you added in step 5 is + correctly migrated. + +--- + +## Known Issues -And the final step is, zipping the new version of the snapshot repository and replace it in the test resources: +**OpenSearch `QueryGroupTask` warnings** - zip -r snapshots_repository.zip /servers/elasticsearch-7.11.0/snapshots_repository - cp /servers/elasticsearch-7.11.0/snapshots_repository.zip src/test/resources/migration/snapshots_repository.zip +OpenSearch test logs contain lines like: -> In case you are using docker, do zip in the container and use `docker cp` to get the zip file from the docker container. + opensearch> [WARN][o.o.w.QueryGroupTask] QueryGroup _id can't be null ... -Now you can modify the migration test class to test that your added data in 1.6.x is correctly migrated in 2.0.0 +This is a known bug in OpenSearch 2.18 with no functional impact. +Tracked at: https://github.com/opensearch-project/OpenSearch/issues/16874 diff --git a/itests/archive-it-run.sh b/itests/archive-it-run.sh new file mode 100755 index 000000000..da38db71b --- /dev/null +++ b/itests/archive-it-run.sh @@ -0,0 +1,520 @@ +#!/bin/bash +################################################################################ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +# +# Archive integration-test run artifacts for later analysis (e.g. LLM review). +# +# Invoke from anywhere: +# ./itests/archive-it-run.sh +# ./itests/archive-it-run.sh -m "Heavy macOS swap during run" +# ./itests/archive-it-run.sh --tar -o /tmp/my-it-run.tar.gz +# ./itests/compare-it-runs.sh --last 3 +# ./itests/archive-it-run.sh --full-karaf # include full karaf.log (can be large) +# +# Default output: unexploded directory under archives/it-run-YYYYMMDD-HHMMSS/ +# Use --tar or -o path ending in .tar.gz to produce a tarball instead. +# +# Shared modules: bootstrap (ui · lib · compare-lib) + karaf-lib + context-lib +# (auto-disabled when NO_COLOR, CI, or non-TTY). +# +# Included (high signal): +# - failsafe-reports/ IT results (XML, txt, dumpstream, summary) +# - surefire-reports/ module unit tests run before ITs (if present) +# - exam/.../karaf.log[.N] full segment(s) only with --full-karaf (rollover: karaf.log.1, .2, …) +# - exam/.../karaf-log-segments.txt rollover segment order (oldest → newest) +# - exam/.../karaf-triage-summary.txt LLM entry point for Karaf log analysis +# - exam/.../karaf-failure-correlation.log excerpts anchored on failing tests +# - exam/.../karaf-exception-index.tsv top recurring exceptions/errors +# - exam/.../karaf-recent.log full merged log (if ≤20k lines) or tail +# - exam/.../karaf-errors-warnings.log merged ERROR/WARN blocks + full stack traces +# - elasticsearch-port.properties / opensearch-port.properties (engine + ports) +# - elasticsearch0/logs, opensearch0/logs (Docker engine logs, if present) +# - llm-it-run-analysis-guide.md, expected-karaf-log-patterns.txt +# - exam/.../karaf-unexpected-candidates.log (errors not matching expected patterns) +# - test-results.tsv, run-summary.properties, failed-tests.txt (LLM-friendly per-run test manifest) +# - run-context.txt, run-config/it-run-trace.properties (build/Maven/options trace) +# - archives/runs-index.tsv (updated each capture — cross-run comparison index) +# - comparison-last-3.txt, archives/latest-comparison.txt (auto when 2+ captures exist) +# - manifest.txt +# +# Excluded (noise / huge): exam/system bundles, test-classes, jacoco, snapshots_repository +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +# shellcheck source=it-run-bootstrap.sh disable=SC1091 +source "$SCRIPT_DIR/it-run-bootstrap.sh" +# shellcheck source=it-run-karaf-lib.sh disable=SC1091 +source "$SCRIPT_DIR/it-run-karaf-lib.sh" +# shellcheck source=it-run-context-lib.sh disable=SC1091 +source "$SCRIPT_DIR/it-run-context-lib.sh" + +# --- configuration ----------------------------------------------------------- + +TARGET_DIR="target" +LLM_GUIDE="llm-it-run-analysis-guide.md" +MANIFEST_REL="manifest.txt" +RUN_TRACE_FILE="it-run-trace.properties" +RUN_CONTEXT_REL="run-context.txt" +FAILED_TESTS_REL="failed-tests.txt" + +REPORT_TREES=( + failsafe-reports + surefire-reports +) + +RUN_CONFIG_FILES=( + elasticsearch-port.properties + opensearch-port.properties + it-run-trace.properties +) + +ENGINE_LOG_TREES=( + elasticsearch0/logs + opensearch0/logs +) + +OUTPUT="" +CREATE_TAR=false +RUN_MESSAGE="" +FULL_KARAF=false +AUTO_COMPARE=true +FORCE_ARCHIVE=false +STAGING="" +STAGING_IS_TEMP=false +MANIFEST="" +PATTERNS_FILE="" +RUN_ID="" +RUN_FINGERPRINT="" +included=0 + +# --- CLI --------------------------------------------------------------------- + +usage() { + echo "Usage: $0 [-o DIR|ARCHIVE.tar.gz] [--tar] [-m MSG] [--message-file FILE] [--full-karaf] [--no-compare] [--force]" + echo " (default) Unexploded directory: archives/it-run-YYYYMMDD-HHMMSS/" + echo " --tar Write a .tar.gz instead (default name under archives/)" + echo " -o PATH Output directory, or .tar.gz / .tgz archive path" + echo " -m, --message Operator note about run context (quoted string)" + echo " --message-file Read operator note from a file" + echo " --full-karaf Include complete karaf.log and rollover segments (default: tail + filtered errors)" + echo " --no-compare Skip auto compare of last 3 captures (default: on when 2+ runs exist)" + echo " --force Archive even if this target/ run was already captured" + exit 1 +} + +parse_args() { + while [ $# -gt 0 ]; do + case "$1" in + -h) usage ;; + -o) + shift + [ $# -gt 0 ] || usage + OUTPUT="$1" + ;; + --tar) CREATE_TAR=true ;; + -m | --message) + shift + [ $# -gt 0 ] || usage + RUN_MESSAGE="$1" + ;; + --message-file) + shift + [ $# -gt 0 ] || usage + [ -f "$1" ] || { ui_error "Message file not found: $1"; exit 1; } + RUN_MESSAGE="$(cat "$1")" + ;; + --full-karaf) FULL_KARAF=true ;; + --no-compare) AUTO_COMPARE=false ;; + --force) FORCE_ARCHIVE=true ;; + *) usage ;; + esac + shift + done +} + +# --- path helpers ------------------------------------------------------------ + +target_path() { + echo "$TARGET_DIR/$1" +} + +staging_path() { + echo "$STAGING/$1" +} + +# --- staging helpers --------------------------------------------------------- + +mark_included() { + included=1 +} + +note_file() { + local rel="$1" + local full size + full="$(staging_path "$rel")" + size="$(wc -c < "$full" 2>/dev/null | tr -d ' ')" + echo " $rel ($size bytes)" >> "$MANIFEST" +} + +log_staged_file() { + local rel="$1" + local message="$2" + note_file "$rel" + ui_detail "$message" +} + +register_staged_file() { + local rel="$1" + local message="$2" + local full + full="$(staging_path "$rel")" + if [ -s "$full" ]; then + log_staged_file "$rel" "$message" + return 0 + fi + rm -f "$full" + return 1 +} + +copy_tree() { + local src="$1" + local dest_name="$2" + local dest file_count + if ! it_dir_has_files "$src"; then + return + fi + dest="$(staging_path "$dest_name")" + mkdir -p "$dest" + cp -R "$src"/. "$dest/" + file_count="$(find "$dest" -type f | wc -l | tr -d ' ')" + echo " $dest_name/ ($file_count files)" >> "$MANIFEST" + mark_included + ui_detail "Added $src" +} + +copy_file() { + local src="$1" + local dest_rel="$2" + local dest + if [ ! -f "$src" ]; then + return + fi + dest="$(staging_path "$dest_rel")" + mkdir -p "$(dirname "$dest")" + cp "$src" "$dest" + log_staged_file "$dest_rel" "Added $src" + mark_included +} + +configure_output() { + local timestamp + timestamp="$(date +%Y%m%d-%H%M%S)" + + if [ -z "$OUTPUT" ]; then + mkdir -p "$IT_ARCHIVES_DIR" + if [ "$CREATE_TAR" = true ]; then + OUTPUT="$IT_ARCHIVES_DIR/it-run-$timestamp.tar.gz" + else + OUTPUT="$IT_ARCHIVES_DIR/it-run-$timestamp" + fi + fi + + if [[ "$OUTPUT" == *.tar.gz ]] || [[ "$OUTPUT" == *.tgz ]]; then + CREATE_TAR=true + fi + + if [ "$CREATE_TAR" = true ]; then + STAGING="$(mktemp -d "${TMPDIR:-/tmp}/unomi-it-archive.XXXXXX")" + STAGING_IS_TEMP=true + else + mkdir -p "$OUTPUT" + STAGING="$OUTPUT" + STAGING_IS_TEMP=false + fi + + RUN_ID="$(basename "$OUTPUT")" + RUN_ID="${RUN_ID%.tar.gz}" + RUN_ID="${RUN_ID%.tgz}" +} + +setup_staging() { + if [ "$STAGING_IS_TEMP" = true ]; then + trap 'ui_spinner_cleanup; rm -rf "$STAGING"' EXIT + else + trap 'ui_spinner_cleanup' EXIT + fi + MANIFEST="$(staging_path "$MANIFEST_REL")" + PATTERNS_FILE="$(staging_path "$KARAF_PATTERNS_REL")" +} + +require_target_dir() { + if [ -d "$TARGET_DIR" ]; then + return + fi + ui_error "$SCRIPT_DIR/$TARGET_DIR not found. Run integration tests first." + exit 1 +} + +reject_duplicate_archive() { + local xml duplicate + + [ "$FORCE_ARCHIVE" = true ] && return 0 + + if ! xml="$(it_target_failsafe_xml "$TARGET_DIR")"; then + ui_detail "Duplicate check skipped (no failsafe XML in $TARGET_DIR)" + return 0 + fi + + RUN_FINGERPRINT="$(it_compute_run_fingerprint_from_target \ + "$xml" "$REPO_ROOT" "$(target_path "$RUN_TRACE_FILE")")" + + if duplicate="$(it_find_duplicate_archive "$RUN_FINGERPRINT")"; then + ui_warn "This IT run is already archived (same test outcomes, git commit, and build options)" + ui_detail "Existing capture: $duplicate" + ui_detail "Use --force to archive again anyway" + exit 0 + fi +} + +require_archived_content() { + if [ "$included" -eq 0 ]; then + ui_error "Nothing to archive under $TARGET_DIR." + exit 1 + fi +} + +# --- test results manifest (for cross-run / flaky analysis) ------------------ + +find_failsafe_xml() { + local candidate + if candidate="$(it_target_failsafe_xml "$TARGET_DIR")"; then + echo "$candidate" + return 0 + fi + candidate="$(staging_path "failsafe-reports/$IT_FAILSAFE_XML")" + if [ -f "$candidate" ]; then + echo "$candidate" + return 0 + fi + return 1 +} + +extract_test_results() { + local xml="$1" + local tsv failed + tsv="$(staging_path "$IT_TEST_RESULTS")" + failed="$(staging_path "$FAILED_TESTS_REL")" + : > "$failed" + + it_write_test_results_from_xml "$xml" "$tsv" "$failed" + + log_staged_file "$IT_TEST_RESULTS" "Wrote $IT_TEST_RESULTS" + if [ -s "$failed" ]; then + log_staged_file "$FAILED_TESTS_REL" "Wrote $FAILED_TESTS_REL" + else + rm -f "$failed" + fi + mark_included +} + +write_run_summary() { + local xml="$1" + local summary failed_count trace summary_xml + summary="$(staging_path "$IT_RUN_SUMMARY")" + trace="$(target_path "$RUN_TRACE_FILE")" + failed_count=0 + if [ -f "$(staging_path "$FAILED_TESTS_REL")" ]; then + failed_count="$(wc -l < "$(staging_path "$FAILED_TESTS_REL")" | tr -d ' ')" + fi + + { + echo "# Per-run summary for cross-run comparison (see archives/$IT_RUNS_INDEX)" + echo "run.id=$RUN_ID" + echo "run.captured=$(it_utc_now)" + echo "run.path=$OUTPUT" + if [ -n "$RUN_MESSAGE" ]; then + echo "operator.note=$RUN_MESSAGE" + fi + if it_git_in_repo "$REPO_ROOT"; then + echo "git.commit=$(it_git_head "$REPO_ROOT")" + echo "git.branch=$(it_git_branch "$REPO_ROOT")" + fi + if [ -f "$trace" ]; then + it_grep_property_lines "$trace" "$IT_TRACE_FINGERPRINT_FIELDS" + fi + echo "search.engine=$(it_extract_xml_property "$xml" unomi.search.engine)" + echo "elasticsearch.heap=$(it_extract_xml_property "$xml" elasticsearch.heap)" + echo "opensearch.heap=$(it_extract_xml_property "$xml" opensearch.heap)" + echo "it.karaf.heap=$(it_extract_xml_property "$xml" it.karaf.heap)" + summary_xml="$(target_path "failsafe-reports/failsafe-summary.xml")" + if [ -f "$summary_xml" ]; then + echo "tests.completed=$(it_failsafe_summary_count "$summary_xml" completed)" + echo "tests.failures=$(it_failsafe_summary_count "$summary_xml" failures)" + echo "tests.errors=$(it_failsafe_summary_count "$summary_xml" errors)" + echo "tests.skipped=$(it_failsafe_summary_count "$summary_xml" skipped)" + fi + echo "failed.tests.count=$failed_count" + if [ -n "$RUN_FINGERPRINT" ]; then + echo "${IT_RUN_FINGERPRINT_FIELD}=$RUN_FINGERPRINT" + fi + } > "$summary" + log_staged_file "$IT_RUN_SUMMARY" "Wrote $IT_RUN_SUMMARY" + mark_included +} + +capture_test_manifest() { + local xml + if ! xml="$(find_failsafe_xml)"; then + ui_warn "No $IT_FAILSAFE_XML found — skipping test-results.tsv" + return + fi + RUN_FINGERPRINT="$(it_ensure_run_fingerprint "$xml" "$REPO_ROOT" \ + "$(target_path "$RUN_TRACE_FILE")" "$RUN_FINGERPRINT")" + extract_test_results "$xml" + write_run_summary "$xml" +} + +# --- archive assembly -------------------------------------------------------- + +archive_report_trees() { + local name + for name in "${REPORT_TREES[@]}"; do + copy_tree "$(target_path "$name")" "$name" + done +} + +archive_run_config_files() { + local name + for name in "${RUN_CONFIG_FILES[@]}"; do + copy_file "$(target_path "$name")" "run-config/$name" + done +} + +archive_engine_log_trees() { + local name + for name in "${ENGINE_LOG_TREES[@]}"; do + copy_tree "$(target_path "$name")" "$name" + done +} + +archive_test_artifacts() { + archive_report_trees + capture_test_manifest + archive_run_config_files + archive_engine_log_trees + write_run_context +} + +prepare_llm_context() { + copy_file "$SCRIPT_DIR/$LLM_GUIDE" "$LLM_GUIDE" + generate_expected_patterns +} + +maybe_run_auto_compare() { + local count tmp latest bundled + + [ "$AUTO_COMPARE" = true ] || return 0 + if [ ! -f "$(staging_path "$IT_TEST_RESULTS")" ]; then + ui_detail "Cross-run compare skipped (no test-results.tsv in this capture)" + return 0 + fi + + count="$(it_count_comparable_runs)" + if [ "$count" -lt 2 ]; then + ui_detail "Cross-run compare skipped (need 2+ captures with test-results.tsv)" + return 0 + fi + + tmp="$(mktemp "${TMPDIR:-/tmp}/unomi-it-auto-compare.XXXXXX")" + latest="$IT_ARCHIVES_DIR/$IT_LATEST_COMPARISON" + bundled="$(staging_path "$IT_CAPTURE_COMPARISON")" + + if it_compare_last_to_file "$IT_AUTO_COMPARE_LAST" "$tmp"; then + mkdir -p "$IT_ARCHIVES_DIR" + cp "$tmp" "$latest" + cp "$tmp" "$bundled" + log_staged_file "$IT_CAPTURE_COMPARISON" \ + "Auto-compared last ${IT_AUTO_COMPARE_LAST} captures (or fewer) → archives/$IT_LATEST_COMPARISON" + ui_detail "Cross-run compare: archives/$IT_LATEST_COMPARISON + $IT_CAPTURE_COMPARISON" + it_compare_show_metrics "$tmp" + mark_included + else + ui_detail "Cross-run compare skipped (comparison failed)" + fi + rm -f "$tmp" +} + +count_staged_bytes() { + local total + total="$(find "$STAGING" -type f -exec wc -c {} + 2>/dev/null | tail -1 | awk '{print $1}')" + if [ -z "$total" ]; then + echo 0 + else + echo "$total" + fi +} + +finalize_output() { + if [ "$STAGING_IS_TEMP" = false ]; then + it_update_runs_index + fi + maybe_run_auto_compare + + if [ "$CREATE_TAR" = true ]; then + mkdir -p "$(dirname "$OUTPUT")" + ui_spinner_run "Compressing archive..." tar -czf "$OUTPUT" -C "$STAGING" . + ui_finish "$OUTPUT" "$(wc -c < "$OUTPUT" | tr -d ' ')" + return + fi + ui_finish_dir "$OUTPUT" "$(count_staged_bytes)" "$(find "$STAGING" -type f | wc -l | tr -d ' ')" +} + +# --- main -------------------------------------------------------------------- + +main() { + it_run_entry_init "$SCRIPT_DIR" + parse_args "$@" + + require_target_dir + reject_duplicate_archive + configure_output + ui_banner + setup_staging + + ui_phase "1/4" "Test reports, run context, and engine config" + write_archive_manifest_header + archive_test_artifacts + + ui_phase "2/4" "LLM triage guide and ignore patterns" + prepare_llm_context + + ui_phase "3/4" "Karaf log mining" + archive_karaf_logs + + require_archived_content + + ui_phase "4/4" "Finalize capture" + write_archive_manifest_footer + finalize_output +} + +main "$@" diff --git a/itests/compare-it-runs.sh b/itests/compare-it-runs.sh new file mode 100755 index 000000000..587cdb620 --- /dev/null +++ b/itests/compare-it-runs.sh @@ -0,0 +1,84 @@ +#!/bin/bash +################################################################################ +# +# Compare two or more IT run captures to classify systematic vs flaky failures. +# +# Usage: +# ./compare-it-runs.sh archives/it-run-20260101-120000 archives/it-run-20260102-120000 +# ./compare-it-runs.sh --last 3 +# ./compare-it-runs.sh --last 5 -o /tmp/comparison.txt +# +# Reads test-results.tsv from each run directory (produced by archive-it-run.sh). +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=it-run-bootstrap.sh disable=SC1091 +source "$SCRIPT_DIR/it-run-bootstrap.sh" + +OUTPUT="" +LAST_N=0 +QUIET=false +RUN_DIRS=() + +usage() { + echo "Usage: $0 RUN_DIR [RUN_DIR ...]" + echo " $0 --last N" + echo " -o FILE Write report to FILE (default: stdout; required with --quiet)" + echo " --quiet No banner or progress (for use from archive-it-run.sh)" + exit 1 +} + +parse_args() { + local runs=() + while [ $# -gt 0 ]; do + case "$1" in + -h) usage ;; + -o) + shift + [ $# -gt 0 ] || usage + OUTPUT="$1" + ;; + --last) + shift + [ $# -gt 0 ] || usage + LAST_N="$1" + ;; + --quiet) QUIET=true ;; + *) + runs+=("$1") + ;; + esac + shift + done + + if [ "$LAST_N" -gt 0 ]; then + while IFS= read -r dir; do + [ -n "$dir" ] && runs+=("$dir") + done < <(it_find_comparable_run_dirs "$LAST_N") + fi + + if [ "${#runs[@]}" -lt 2 ]; then + if [ "$QUIET" = true ]; then + exit 1 + fi + ui_error "Need at least two run directories to compare." + usage + fi + + if [ "$QUIET" = true ] && [ -z "$OUTPUT" ]; then + echo "ERROR: --quiet requires -o FILE" >&2 + exit 1 + fi + + RUN_DIRS=("${runs[@]}") +} + +main() { + it_run_entry_init "$SCRIPT_DIR" + parse_args "$@" + it_compare_cli_run "$LAST_N" "$QUIET" "$OUTPUT" "${RUN_DIRS[@]}" +} + +main "$@" diff --git a/itests/it-run-bootstrap.sh b/itests/it-run-bootstrap.sh new file mode 100644 index 000000000..f11dc1441 --- /dev/null +++ b/itests/it-run-bootstrap.sh @@ -0,0 +1,18 @@ +# Source all IT run tooling modules (sourced, not executed). + +it_run_source_modules() { + local script_dir="$1" + # shellcheck source=it-run-ui.sh disable=SC1091 + source "$script_dir/it-run-ui.sh" + # shellcheck source=it-run-lib.sh disable=SC1091 + source "$script_dir/it-run-lib.sh" + # shellcheck source=it-run-compare-lib.sh disable=SC1091 + source "$script_dir/it-run-compare-lib.sh" +} + +it_run_entry_init() { + local script_dir="$1" + it_run_source_modules "$script_dir" + it_run_tools_init "$script_dir" + ui_init +} diff --git a/itests/it-run-compare-lib.sh b/itests/it-run-compare-lib.sh new file mode 100644 index 000000000..d803b4d72 --- /dev/null +++ b/itests/it-run-compare-lib.sh @@ -0,0 +1,301 @@ +# Cross-run comparison engine (sourced by compare-it-runs.sh and archive-it-run.sh). + +IT_COMPARE_LEGEND=( + "systematic: FAIL/ERROR in every run where the test appears" + "flaky: mix of PASS and FAIL/ERROR across runs" + "regression: PASS in an earlier run, FAIL/ERROR in the latest run" + "fixed: FAIL/ERROR in an earlier run, PASS in the latest run" +) + +it_compare_sort_run_dirs() { + local run_dir + local -a unsorted=("$@") + local -a lines=() + local captured + + for run_dir in "${unsorted[@]}"; do + captured="$(it_read_run_summary_field "$run_dir" run.captured)" + if [ -z "$captured" ]; then + captured="$(it_run_label "$run_dir")" + fi + lines+=("${captured}"$'\t'"${run_dir}") + done + + printf '%s\n' "${lines[@]}" | sort -t $'\t' -k1,1 | cut -f2- +} + +it_compare_write_header() { + local report_file="$1" + local run_count="$2" + local labels_line="$3" + local item + + { + echo "# IT run comparison" + echo "# Runs (${run_count}): ${labels_line}" + echo "# Generated: $(it_utc_now)" + echo + echo "## Legend" + for item in "${IT_COMPARE_LEGEND[@]}"; do + echo "- ${item}" + done + echo + } > "$report_file" +} + +it_compare_write_metadata() { + local report_file="$1" + local run_dir label + + for run_dir in "${@:2}"; do + label="$(it_run_label "$run_dir")" + { + echo "## Run: $label" + it_append_run_summary_excerpt "$run_dir" + echo + } >> "$report_file" + done +} + +it_compare_write_analysis() { + local report_file="$1" + local labels_line="$2" + shift 2 + local -a tsv_files=("$@") + + awk -v runs="$labels_line" ' + function test_id(key, parts) { + split(key, parts, "\t") + return parts[1] "." parts[2] + } + function get_status(r, id) { + return ((r SUBSEP id) in status) ? status[r, id] : "MISSING" + } + function is_bad(st) { + return st == "FAIL" || st == "ERROR" + } + function print_section(title, count, lines, i) { + print title + if (count == 0) { + print "(none)" + } else { + for (i = 1; i <= count; i++) { + print lines[i] + } + } + print "" + } + BEGIN { + n = split(runs, run_labels, " ") + latest = n + } + FNR == 1 { run_idx++; next } + NF >= 3 { + key = $1 "\t" $2 + if (!(key in seen)) { + seen[key] = 1 + order[++order_count] = key + } + id = test_id(key, parts) + status[run_idx, id] = $3 + } + END { + for (i = 1; i <= order_count; i++) { + key = order[i] + id = test_id(key, parts) + bad = 0 + good = 0 + seen_runs = 0 + detail = "" + for (r = 1; r <= n; r++) { + st = get_status(r, id) + if (st != "MISSING") { + seen_runs++ + } + if (is_bad(st)) { + bad++ + } else if (st == "PASS") { + good++ + } + if (r > 1) { + detail = detail ", " + } + detail = detail run_labels[r] "=" st + } + latest_st = get_status(latest, id) + + if (seen_runs == n && bad == n) { + systematic[++systematic_count] = "- " id " (" n "/" n " runs)" + } + if (bad > 0 && good > 0) { + flaky[++flaky_count] = "- " id " (" detail ")" + } + if (is_bad(latest_st)) { + for (r = 1; r < latest; r++) { + prev = get_status(r, id) + if (prev == "PASS") { + regression[++regression_count] = "- " id " (was PASS in " run_labels[r] ", now " latest_st " in " run_labels[latest] ")" + break + } + } + } + if (latest_st == "PASS") { + for (r = 1; r < latest; r++) { + prev = get_status(r, id) + if (is_bad(prev)) { + fixed[++fixed_count] = "- " id " (was " prev " in " run_labels[r] ", now PASS)" + break + } + } + } + } + + print_section("## Systematic failures (fail/error in every run)", systematic_count, systematic) + print_section("## Flaky tests (mixed pass and fail/error)", flaky_count, flaky) + print_section("## Regressions (pass -> fail/error vs latest run: " run_labels[latest] ")", regression_count, regression) + print_section("## Fixed since earlier runs (fail/error -> pass in " run_labels[latest] ")", fixed_count, fixed) + + print "## Totals" + print "systematic=" systematic_count + 0 + print "flaky=" flaky_count + 0 + print "regression=" regression_count + 0 + print "fixed=" fixed_count + 0 + }' "${tsv_files[@]}" >> "$report_file" +} + +it_compare_write_report() { + local report_file="$1" + shift + local -a run_dirs=("$@") + local -a labels=() + local -a tsv_files=() + local run_dir + + for run_dir in "${run_dirs[@]}"; do + labels+=("$(it_run_label "$run_dir")") + tsv_files+=("$run_dir/$IT_TEST_RESULTS") + done + + it_compare_write_header "$report_file" "${#run_dirs[@]}" "${labels[*]}" + it_compare_write_metadata "$report_file" "${run_dirs[@]}" + it_compare_write_analysis "$report_file" "${labels[*]}" "${tsv_files[@]}" +} + +it_compare_show_metrics() { + local report_file="$1" + + ui_metric "systematic" "$(it_read_comparison_total "$report_file" systematic)" + ui_metric "flaky" "$(it_read_comparison_total "$report_file" flaky)" + ui_metric "regression" "$(it_read_comparison_total "$report_file" regression)" + ui_metric "fixed" "$(it_read_comparison_total "$report_file" fixed)" +} + +it_compare_validate_run_dirs() { + local show_details="$1" + shift + local run_dir + + for run_dir in "$@"; do + it_validate_run_dir "$run_dir" + if [ "$show_details" = true ]; then + ui_detail "Loaded $(it_summarize_run_dir "$run_dir")" + fi + done +} + +it_compare_last_to_file() { + local count="$1" + local report_file="$2" + local -a run_dirs=() + local dir + + while IFS= read -r dir; do + [ -n "$dir" ] && run_dirs+=("$dir") + done < <(it_find_comparable_run_dirs "$count") + + if [ "${#run_dirs[@]}" -lt 2 ]; then + return 1 + fi + + it_compare_write_report "$report_file" "${run_dirs[@]}" +} + +it_compare_runs_to_file() { + local report_file="$1" + shift + local -a run_dirs=() + + if [ "$#" -lt 2 ]; then + return 1 + fi + + while IFS= read -r dir; do + [ -n "$dir" ] && run_dirs+=("$dir") + done < <(it_compare_sort_run_dirs "$@") + + it_compare_write_report "$report_file" "${run_dirs[@]}" +} + +it_compare_publish_report() { + local tmp="$1" + local output="${2:-}" + + if [ -n "$output" ]; then + cp "$tmp" "$output" + ui_finish_report "$output" "$(wc -c < "$output" | tr -d ' ')" + return + fi + + if ui__color_enabled; then + ui_detail "Report follows:" + echo + fi + cat "$tmp" +} + +it_compare_write_to_temp() { + local tmp="$1" + local last_n="$2" + shift 2 + local -a run_dirs=("$@") + + if [ "$last_n" -gt 0 ]; then + it_compare_last_to_file "$last_n" "$tmp" + else + it_compare_runs_to_file "$tmp" "${run_dirs[@]}" + fi +} + +it_compare_cli_run() { + local last_n="$1" + local quiet="$2" + local output="$3" + shift 3 + local -a run_dirs=("$@") + local compare_tmp="" + local show_details=false + + if [ "$quiet" != true ]; then + show_details=true + ui_banner_compare + ui_phase "1/2" "Load run captures" + fi + it_compare_validate_run_dirs "$show_details" "${run_dirs[@]}" + + compare_tmp="$(mktemp "${TMPDIR:-/tmp}/unomi-it-compare.XXXXXX")" + trap 'ui_spinner_cleanup; rm -f "$compare_tmp"' EXIT + + if [ "$quiet" = true ]; then + it_compare_write_to_temp "$compare_tmp" "$last_n" "${run_dirs[@]}" + cp "$compare_tmp" "$output" + else + ui_phase "2/2" "Classify failures" + ui_spinner_run "Comparing test results across ${#run_dirs[@]} runs..." \ + it_compare_write_to_temp "$compare_tmp" "$last_n" "${run_dirs[@]}" + it_compare_show_metrics "$compare_tmp" + it_compare_publish_report "$compare_tmp" "$output" + fi + + rm -f "$compare_tmp" + trap - EXIT +} diff --git a/itests/it-run-context-lib.sh b/itests/it-run-context-lib.sh new file mode 100644 index 000000000..3e3b324a8 --- /dev/null +++ b/itests/it-run-context-lib.sh @@ -0,0 +1,139 @@ +# Run context and manifest assembly for archive-it-run.sh. +# Expects archive hooks: target_path, staging_path, log_staged_file, mark_included, +# and globals: SCRIPT_DIR, REPO_ROOT, TARGET_DIR, MANIFEST, OUTPUT, RUN_MESSAGE, +# RUN_ID, FULL_KARAF, CREATE_TAR, RUN_CONTEXT_REL, RUN_TRACE_FILE. + +context_append_inferred_maven_properties() { + local engine + engine="$(it_infer_search_engine "$TARGET_DIR")" + echo "inferred.search.engine=$engine" + echo "# POM profile defaults (actual run may override via build.sh -D flags; see it-run-trace.properties when present)" + echo "pom.default.elasticsearch.heap=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" elasticsearch elasticsearch.heap)" + echo "pom.default.opensearch.heap=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" opensearch opensearch.heap)" + echo "pom.default.karaf.heap.elasticsearch=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" elasticsearch karaf.heap)" + echo "pom.default.karaf.heap.opensearch=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" opensearch karaf.heap)" + echo "pom.default.elasticsearch.port=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" elasticsearch elasticsearch.port)" + echo "pom.default.opensearch.port=$(it_read_pom_profile_property "$SCRIPT_DIR/pom.xml" opensearch opensearch.port)" +} + +context_append_engine_port_files() { + local port_file path + + for port_file in "${IT_ENGINE_PORT_FILES[@]}"; do + path="$(target_path "$port_file")" + if [ -f "$path" ]; then + echo "# $port_file" + cat "$path" + fi + done +} + +context_append_system_snapshot() { + echo "snapshot.time=$(it_utc_now)" + echo "snapshot.host=$(it_hostname)" + echo "snapshot.uname=$(uname -a 2>/dev/null || echo unknown)" + if command -v uptime >/dev/null 2>&1; then + echo "snapshot.uptime=$(uptime 2>/dev/null | sed 's/^[[:space:]]*//')" + fi + if command -v df >/dev/null 2>&1; then + echo "snapshot.disk.target=$(df -h "$SCRIPT_DIR" 2>/dev/null | tail -1)" + echo "snapshot.disk.output=$(df -h "$OUTPUT" 2>/dev/null | tail -1)" + fi + if [ -n "${MAVEN_OPTS:-}" ]; then + echo "snapshot.maven.opts=$MAVEN_OPTS" + fi + if [ -n "${MAVEN_CMD_LINE_ARGS:-}" ]; then + echo "snapshot.maven.cmdline.args=$MAVEN_CMD_LINE_ARGS" + fi + if [ "$(uname -s 2>/dev/null)" = Darwin ] && command -v vm_stat >/dev/null 2>&1; then + echo "snapshot.vmstat.begin" + vm_stat | grep -E 'page size|Pages free|Pages active|Pages inactive|Pages wired|Pages occupied by compressor|Swapins|Swapouts' \ + || vm_stat | head -12 + echo "snapshot.vmstat.end" + fi + if [ "$(uname -s 2>/dev/null)" = Darwin ] && command -v memory_pressure >/dev/null 2>&1; then + echo "snapshot.memory_pressure=$(memory_pressure 2>/dev/null | head -1)" + fi +} + +context_emit_manifest_git_lines() { + if ! it_git_in_repo "$REPO_ROOT"; then + return + fi + echo "Git branch: $(it_git_branch "$REPO_ROOT")" + echo "Git commit: $(it_git_head "$REPO_ROOT")" +} + +context_emit_manifest_java_line() { + if command -v java >/dev/null 2>&1; then + echo "Java: $(java -version 2>&1 | head -1)" + fi +} + +write_archive_manifest_header() { + { + echo "Apache Unomi integration-test run archive" + echo "Created: $(it_utc_now)" + echo "Host: $(it_hostname)" + echo "itests directory: $SCRIPT_DIR" + context_emit_manifest_git_lines + context_emit_manifest_java_line + echo "Karaf log mode: $([ "$FULL_KARAF" = true ] && echo full || echo tail+filtered)" + echo + echo "Included paths:" + } > "$MANIFEST" +} + +write_archive_manifest_footer() { + { + echo + echo "Excluded (intentionally): exam/system bundles, test-classes, jacoco, snapshots_repository" + if [ -n "$RUN_MESSAGE" ]; then + echo + echo "Operator note:" + echo "$RUN_MESSAGE" + fi + if [ "$CREATE_TAR" = true ]; then + echo "Output format: tar.gz -> $OUTPUT" + else + echo "Output format: directory -> $OUTPUT" + fi + } >> "$MANIFEST" +} + +write_run_context() { + local out trace + out="$(staging_path "$RUN_CONTEXT_REL")" + trace="$(target_path "$RUN_TRACE_FILE")" + { + echo "# Integration test run context" + echo "# Read this first when triaging with an LLM or reviewing locally." + echo "# For flaky vs systematic failures across runs: test-results.tsv + archives/$IT_RUNS_INDEX" + echo "# Compare runs: $IT_CAPTURE_COMPARISON (auto) or ./compare-it-runs.sh --last $IT_AUTO_COMPARE_LAST" + echo + echo "## Operator notes" + if [ -n "$RUN_MESSAGE" ]; then + echo "$RUN_MESSAGE" + else + echo "$IT_RUN_CONTEXT_NO_OPERATOR_NOTE" + fi + echo + echo "## Build / Maven trace" + if [ -f "$trace" ]; then + cat "$trace" + else + echo "# it-run-trace.properties not found in target/" + echo "# Run integration tests via ./build.sh --integration-tests to capture build.sh and Maven options." + echo + context_append_inferred_maven_properties + fi + echo + echo "## Docker port mappings (from target)" + context_append_engine_port_files + echo + echo "## System snapshot (at archive time — not at IT start)" + context_append_system_snapshot + } > "$out" + log_staged_file "$RUN_CONTEXT_REL" "Wrote $RUN_CONTEXT_REL" + mark_included +} diff --git a/itests/it-run-karaf-lib.sh b/itests/it-run-karaf-lib.sh new file mode 100644 index 000000000..2b6161f44 --- /dev/null +++ b/itests/it-run-karaf-lib.sh @@ -0,0 +1,748 @@ +# Karaf log triage for archive-it-run.sh (supports log4j2 rollover: karaf.log, karaf.log.1, …). +# Expects archive hooks: target_path, staging_path, log_staged_file, register_staged_file, +# mark_included, ui_spinner_run, ui_detail, line_matches_expected_pattern, PATTERNS_FILE, +# TARGET_DIR, FULL_KARAF, MANIFEST. + +KARAF_PRIMARY_LOG="karaf.log" +KARAF_TAIL_MAX_LINES=20000 +KARAF_MATCH_CONTEXT_BEFORE=15 +KARAF_MATCH_CONTEXT_AFTER=15 +KARAF_STACK_MAX_LINES=120 +KARAF_TRIGGER_RE='ERROR|WARN |Exception|Caused by:|<<< FAILURE|<<< ERROR|FATAL' +KARAF_BLOCK_HEADER_RE='^--- block [0-9]+ \(lines [0-9]+-[0-9]+' +KARAF_CORRELATION_HEADER_RE='^--- correlated with ' +KARAF_RECENT_LOG="karaf-recent.log" +KARAF_SEGMENTS_INDEX="karaf-log-segments.txt" +KARAF_ERRORS_LOG="karaf-errors-warnings.log" +KARAF_UNEXPECTED_LOG="karaf-unexpected-candidates.log" +KARAF_FAILURE_CORRELATION_LOG="karaf-failure-correlation.log" +KARAF_TRIAGE_SUMMARY="karaf-triage-summary.txt" +KARAF_EXCEPTION_INDEX="karaf-exception-index.tsv" +KARAF_FAILED_TESTS_REL="${FAILED_TESTS_REL:-failed-tests.txt}" +KARAF_IT_SOURCES="src/test/java/org/apache/unomi/itests" +KARAF_PATTERNS_REL="expected-karaf-log-patterns.txt" +KARAF_STATIC_DUMMY_PATTERNS=( + dummy_scope + 'event type: dummy' + 'eventType":"dummy' + dummy_workspace +) + +karaf_write_log_checker_patterns() { + if [ ! -d "$KARAF_IT_SOURCES" ]; then + return + fi + grep -rh 'addIgnoredSubstring' "$KARAF_IT_SOURCES" --include='*IT.java' 2>/dev/null \ + | sed -n 's/.*addIgnoredSubstring("\([^"]*\)").*/SUBSTRING: \1/p' \ + | sort -u + grep -rh 'addIgnoredMultiPart' "$KARAF_IT_SOURCES" --include='*IT.java' 2>/dev/null \ + | sed -n 's/.*addIgnoredMultiPart(\(.*\)).*/\1/p' \ + | sed 's/", "/|/g; s/"//g' \ + | sed 's/^/MULTIPART: /' \ + | sort -u +} + +generate_expected_patterns() { + { + echo "# Expected Karaf log patterns for LLM triage" + echo "# Generated by archive-it-run.sh from LogChecker.createLogChecker() in *IT.java" + echo "# SUBSTRING: ignore log lines containing this text (intentional failure/hardening tests)" + echo "# MULTIPART: ignore if all pipe-separated parts appear on the same line in order" + echo + echo "# Static fixtures (dummy schema/scope tests — not production errors)" + local pattern + for pattern in "${KARAF_STATIC_DUMMY_PATTERNS[@]}"; do + echo "SUBSTRING: $pattern" + done + echo + karaf_write_log_checker_patterns + } > "$PATTERNS_FILE" + log_staged_file "$KARAF_PATTERNS_REL" "Generated $KARAF_PATTERNS_REL" + mark_included +} + +line_matches_expected_pattern() { + local line="$1" + local pattern_line needle rest part + line="$(karaf_strip_ansi "$line")" + while IFS= read -r pattern_line || [ -n "$pattern_line" ]; do + case "$pattern_line" in + ''|\#*) continue ;; + SUBSTRING:*) + needle="${pattern_line#SUBSTRING: }" + if [[ "$line" == *"$needle"* ]]; then + return 0 + fi + ;; + MULTIPART:*) + rest="$line" + IFS='|' read -ra multipart <<< "${pattern_line#MULTIPART: }" + for part in "${multipart[@]}"; do + if [[ "$rest" != *"$part"* ]]; then + continue 2 + fi + rest="${rest#*"$part"}" + done + return 0 + ;; + esac + done < "$PATTERNS_FILE" + return 1 +} + +karaf_staged_path() { + staging_path "$1/$2" +} + +karaf_log_dir_has_segments() { + local log_dir="$1" + [ -f "$log_dir/$KARAF_PRIMARY_LOG" ] || compgen -G "$log_dir/$KARAF_PRIMARY_LOG.[0-9]*" >/dev/null +} + +list_karaf_log_segments() { + local log_dir="$1" + local max_i=0 i f + + shopt -s nullglob + for f in "$log_dir"/"$KARAF_PRIMARY_LOG".[0-9]*; do + [ -f "$f" ] || continue + i="${f##*.}" + [[ "$i" =~ ^[0-9]+$ ]] || continue + if [ "$i" -gt "$max_i" ]; then + max_i="$i" + fi + done + shopt -u nullglob + + while [ "$max_i" -gt 0 ]; do + f="$log_dir/$KARAF_PRIMARY_LOG.$max_i" + if [ -f "$f" ]; then + printf '%s\n' "$f" + fi + max_i=$((max_i - 1)) + done + + if [ -f "$log_dir/$KARAF_PRIMARY_LOG" ]; then + printf '%s\n' "$log_dir/$KARAF_PRIMARY_LOG" + fi +} + +count_karaf_log_segments() { + list_karaf_log_segments "$1" | wc -l | tr -d ' ' +} + +merge_karaf_log_segments() { + local log_dir="$1" + local out="$2" + local seg + + : > "$out" + while IFS= read -r seg; do + [ -n "$seg" ] || continue + cat "$seg" >> "$out" + done < <(list_karaf_log_segments "$log_dir") +} + +write_karaf_segments_index() { + local log_dir="$1" + local base_dir="$2" + local out order=0 + local seg name bytes + + out="$(karaf_staged_path "$base_dir" "$KARAF_SEGMENTS_INDEX")" + { + echo "# Karaf log segments in chronological order (oldest first)" + echo "# Rolled files: karaf.log.N (higher N = older); active file: karaf.log" + echo "# order\tfile\tbytes" + while IFS= read -r seg; do + [ -n "$seg" ] || continue + order=$((order + 1)) + name="$(basename "$seg")" + bytes="$(wc -c < "$seg" 2>/dev/null | tr -d ' ')" + printf '%d\t%s\t%s\n' "$order" "$name" "$bytes" + done < <(list_karaf_log_segments "$log_dir") + } > "$out" + register_staged_file "$base_dir/$KARAF_SEGMENTS_INDEX" \ + "Added Karaf rollover segment index from $log_dir" +} + +karaf_segment_summary() { + local count="$1" + local rollover_count=$((count - 1)) + local summary="karaf.log" + + if [ "$rollover_count" -gt 0 ]; then + summary="$summary + $rollover_count rollover segment(s)" + fi + printf '%s' "$summary" +} + +karaf_strip_ansi() { + sed $'s/\033\\[[0-9;]*m//g' <<< "$1" +} + +karaf_is_trigger_line() { + local stripped + stripped="$(karaf_strip_ansi "$1")" + [[ "$stripped" =~ $KARAF_TRIGGER_RE ]] && return 0 + [[ "$stripped" =~ ^[a-zA-Z0-9_.$]+(\.[a-zA-Z0-9_$]+)*(Exception|Error): ]] && return 0 + return 1 +} + +karaf_is_globally_ignored_line() { + local stripped + stripped="$(karaf_strip_ansi "$1")" + # LogChecker fast path: BundleWatcher startup warnings are expected churn. + if [[ "$stripped" =~ \|[^|]*WARN[^|]*\|[^|]*BundleWatcher ]]; then + return 0 + fi + if [[ "$stripped" == *"Old style feature file without namespace"* ]]; then + return 0 + fi + return 1 +} + +karaf_line_is_actionable_trigger() { + local line="$1" + karaf_is_trigger_line "$line" || return 1 + karaf_is_globally_ignored_line "$line" && return 1 + line_matches_expected_pattern "$(karaf_strip_ansi "$line")" && return 1 + return 0 +} + +is_karaf_block_header() { + [[ "$1" =~ $KARAF_BLOCK_HEADER_RE ]] || [[ "$1" =~ $KARAF_CORRELATION_HEADER_RE ]] +} + +write_karaf_recent_excerpt() { + local merged="$1" + local dest="$2" + local total + + total="$(wc -l < "$merged" | tr -d ' ')" + if [ "$total" -le "$KARAF_TAIL_MAX_LINES" ]; then + cp "$merged" "$dest" + return + fi + tail -n "$KARAF_TAIL_MAX_LINES" "$merged" > "$dest" +} + +extract_karaf_errors_with_context() { + local log="$1" + local out="$2" + awk -v before="$KARAF_MATCH_CONTEXT_BEFORE" \ + -v after="$KARAF_MATCH_CONTEXT_AFTER" \ + -v stack_max="$KARAF_STACK_MAX_LINES" \ + -v trigger_re="$KARAF_TRIGGER_RE" ' + function strip_ansi(s, t) { + t = s + while (match(t, /\033\[[0-9;]*m/)) { + t = substr(t, 1, RSTART - 1) substr(t, RSTART + RLENGTH) + } + gsub(/\r/, "", t) + return t + } + function is_timestamped_log(line) { + return line ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T/ \ + || line ~ /^[A-Z][a-z]{2} [0-9]{2}, [0-9]{4}/ + } + function is_stack_line(line) { + return line ~ /^[[:space:]]+at / \ + || line ~ /^Caused by:/ \ + || line ~ /^[[:space:]]+\.\.\. [0-9]+ more/ + } + function is_exception_head(line) { + return line ~ /^[a-zA-Z0-9_.$]+(\.[a-zA-Z0-9_$]+)*(Exception|Error):/ + } + function is_globally_ignored(line, s) { + s = strip_ansi(line) + if (s ~ /\|WARN[^|]*\|[^|]*BundleWatcher/) return 1 + if (index(s, "Old style feature file without namespace") > 0) return 1 + return 0 + } + function is_trigger(line, s) { + if (is_globally_ignored(line)) return 0 + s = strip_ansi(line) + if (s ~ trigger_re) return 1 + if (is_exception_head(s)) return 1 + return 0 + } + function extend_end_for_stack(start_line, end_line, j, extended) { + extended = 0 + for (j = start_line + 1; j <= total; j++) { + if (is_stack_line(lines[j]) || is_exception_head(strip_ansi(lines[j]))) { + if (j > end_line) end_line = j + extended++ + if (extended >= stack_max) break + continue + } + if (is_timestamped_log(strip_ansi(lines[j]))) { + break + } + } + return end_line + } + { + lines[NR] = $0 + total = NR + } + END { + tcount = 0 + for (i = 1; i <= total; i++) { + if (!is_trigger(lines[i])) continue + tcount++ + triggers[tcount] = i + } + + rcount = 0 + for (ti = 1; ti <= tcount; ti++) { + i = triggers[ti] + start = i - before + if (start < 1) start = 1 + end = i + after + if (end > total) end = total + end = extend_end_for_stack(i, end) + rcount++ + range_start[rcount] = start + range_end[rcount] = end + range_anchor[rcount] = i + } + + if (rcount == 0) exit + + # Sort ranges by start line (insertion sort — few ranges). + for (i = 2; i <= rcount; i++) { + j = i + while (j > 1 && range_start[j] < range_start[j - 1]) { + tmp = range_start[j]; range_start[j] = range_start[j - 1]; range_start[j - 1] = tmp + tmp = range_end[j]; range_end[j] = range_end[j - 1]; range_end[j - 1] = tmp + tmp = range_anchor[j]; range_anchor[j] = range_anchor[j - 1]; range_anchor[j - 1] = tmp + j-- + } + } + + mcount = 0 + for (i = 1; i <= rcount; i++) { + if (mcount == 0) { + mcount = 1 + merged_start[1] = range_start[i] + merged_end[1] = range_end[i] + continue + } + if (range_start[i] <= merged_end[mcount] + 2) { + if (range_end[i] > merged_end[mcount]) { + merged_end[mcount] = range_end[i] + } + } else { + mcount++ + merged_start[mcount] = range_start[i] + merged_end[mcount] = range_end[i] + } + } + + for (bi = 1; bi <= mcount; bi++) { + printf "--- block %d (lines %d-%d) ---\n", bi, merged_start[bi], merged_end[bi] + for (j = merged_start[bi]; j <= merged_end[bi]; j++) { + print strip_ansi(lines[j]) + } + print "" + } + }' "$log" > "$out" +} + +write_karaf_exception_index() { + local log="$1" + local out="$2" + awk ' + function strip_ansi(s, t) { + t = s + while (match(t, /\033\[[0-9;]*m/)) { + t = substr(t, 1, RSTART - 1) substr(t, RSTART + RLENGTH) + } + return t + } + function sample_key(line, s) { + s = strip_ansi(line) + if (s ~ /^[a-zA-Z0-9_.$]+(\.[a-zA-Z0-9_$]+)*(Exception|Error):/) { + return s + } + if (s ~ /\|ERROR\|/) { + sub(/^[^|]*\|[^|]*\|[^|]*\|[^|]*\| /, "", s) + if (length(s) > 160) s = substr(s, 1, 160) "..." + return "ERROR: " s + } + if (s ~ /\|WARN /) { + sub(/^[^|]*\|[^|]*\|[^|]*\|[^|]*\| /, "", s) + if (length(s) > 160) s = substr(s, 1, 160) "..." + return "WARN: " s + } + return "" + } + { + key = sample_key($0) + if (key == "") next + count[key]++ + if (!(key in first_line)) { + first_line[key] = NR + } + } + END { + n = 0 + for (key in count) { + n++ + keys[n] = key + vals[n] = count[key] + lines[n] = first_line[key] + } + for (i = 1; i <= n; i++) { + for (j = i + 1; j <= n; j++) { + if (vals[j] > vals[i] \ + || (vals[j] == vals[i] && lines[j] < lines[i])) { + tmp = keys[i]; keys[i] = keys[j]; keys[j] = tmp + tmp = vals[i]; vals[i] = vals[j]; vals[j] = tmp + tmp = lines[i]; lines[i] = lines[j]; lines[j] = tmp + } + } + } + print "rank\tcount\tfirst_line\tsample" + limit = (n > 80) ? 80 : n + for (i = 1; i <= limit; i++) { + gsub(/\t/, " ", keys[i]) + printf "%d\t%d\t%d\t%s\n", i, vals[i], lines[i], keys[i] + } + }' "$log" > "$out" +} + +write_karaf_failure_correlation() { + local merged="$1" + local failed_file="$2" + local out="$3" + + if [ ! -s "$failed_file" ]; then + return 1 + fi + + { + echo "# Karaf excerpts near failing tests (class/method anchors in stack traces or messages)" + echo "# Source: $KARAF_FAILED_TESTS_REL — pair with failsafe-reports/AllITs.txt stack traces" + echo + } > "$out" + + awk -v before="$KARAF_MATCH_CONTEXT_BEFORE" \ + -v after="$KARAF_MATCH_CONTEXT_AFTER" \ + -v stack_max="$KARAF_STACK_MAX_LINES" \ + -v failed_file="$failed_file" \ + -v max_per_test=3 ' + function strip_ansi(s, t) { + t = s + while (match(t, /\033\[[0-9;]*m/)) { + t = substr(t, 1, RSTART - 1) substr(t, RSTART + RLENGTH) + } + gsub(/\r/, "", t) + return t + } + function is_timestamped_log(line) { + return line ~ /^[0-9]{4}-[0-9]{2}-[0-9]{2}T/ \ + || line ~ /^[A-Z][a-z]{2} [0-9]{2}, [0-9]{4}/ + } + function is_stack_line(line) { + return line ~ /^[[:space:]]+at / \ + || line ~ /^Caused by:/ \ + || line ~ /^[[:space:]]+\.\.\. [0-9]+ more/ + } + function is_exception_head(line) { + return line ~ /^[a-zA-Z0-9_.$]+(\.[a-zA-Z0-9_$]+)*(Exception|Error):/ + } + function matches_test(line, class, method) { + if (index(line, class)) return 1 + if (index(line, class ".java")) return 1 + if (index(line, class "." method)) return 1 + if (index(line, "at ") && index(line, class) && index(line, method)) return 1 + return 0 + } + function extend_end(anchor, end, j, extended) { + extended = 0 + for (j = anchor + 1; j <= total; j++) { + if (is_stack_line(lines[j]) || is_exception_head(strip_ansi(lines[j]))) { + if (j > end) end = j + extended++ + if (extended >= stack_max) break + continue + } + if (is_timestamped_log(strip_ansi(lines[j]))) break + } + return end + } + function print_block(test, anchor, start, end, j, key) { + if (anchor < 1 || anchor > total) return + key = test SUBSEP anchor + if (key in printed) return + printed[key] = 1 + per_test[test]++ + if (per_test[test] > max_per_test) return + start = anchor - before + if (start < 1) start = 1 + end = anchor + after + if (end > total) end = total + end = extend_end(anchor, end) + printf "--- correlated with %s (anchor line %d, excerpt %d-%d) ---\n", test, anchor, start, end + for (j = start; j <= end; j++) { + print strip_ansi(lines[j]) + } + print "" + } + BEGIN { + while ((getline test < failed_file) > 0) { + if (test == "") continue + tcount++ + tests[tcount] = test + split(test, parts, ".") + methods[tcount] = parts[length(parts)] + classes[tcount] = test + sub(/\.[^.]+$/, "", classes[tcount]) + } + close(failed_file) + } + { + lines[NR] = $0 + total = NR + } + END { + for (ti = 1; ti <= tcount; ti++) { + test = tests[ti] + class = classes[ti] + method = methods[ti] + per_test[test] = 0 + for (i = 1; i <= total; i++) { + line = strip_ansi(lines[i]) + if (matches_test(line, class, method)) { + print_block(test, i) + } + } + } + }' "$merged" >> "$out" + + grep -q '^--- correlated with ' "$out" +} + +write_karaf_triage_summary() { + local merged="$1" + local errors_log="$2" + local unexpected_log="$3" + local correlation_log="$4" + local index_tsv="$5" + local out="$6" + local total merged_bytes error_blocks unexpected_blocks correlation_blocks failed_count + + total="$(wc -l < "$merged" | tr -d ' ')" + merged_bytes="$(wc -c < "$merged" | tr -d ' ')" + error_blocks="$(grep -c '^--- block ' "$errors_log" 2>/dev/null || echo 0)" + unexpected_blocks="$(grep -cE '^--- (block|correlated with) ' "$unexpected_log" 2>/dev/null || echo 0)" + correlation_blocks=0 + if [ -f "$correlation_log" ]; then + correlation_blocks="$(grep -c '^--- correlated with ' "$correlation_log" 2>/dev/null || echo 0)" + fi + failed_count=0 + if [ -f "$(staging_path "$KARAF_FAILED_TESTS_REL")" ]; then + failed_count="$(wc -l < "$(staging_path "$KARAF_FAILED_TESTS_REL")" | tr -d ' ')" + fi + + { + echo "# Karaf triage summary (read this before deep log diving)" + echo "# Generated: $(it_utc_now)" + echo + echo "## Log volume" + echo "merged.lines=$total" + echo "merged.bytes=$merged_bytes" + echo "context.policy=${KARAF_MATCH_CONTEXT_BEFORE}+${KARAF_MATCH_CONTEXT_AFTER} lines, stack extension up to ${KARAF_STACK_MAX_LINES} lines" + echo "recent.excerpt.lines=$(wc -l < "$(dirname "$out")/$KARAF_RECENT_LOG" 2>/dev/null | tr -d ' ')" + echo + echo "## Mined artifacts (priority order for LLM)" + local n=1 + echo "$n. $KARAF_TRIAGE_SUMMARY (this file)" + n=$((n + 1)) + if [ "$failed_count" -gt 0 ]; then + echo "$n. $KARAF_FAILURE_CORRELATION_LOG — excerpts near $failed_count failing test(s)" + n=$((n + 1)) + fi + echo "$n. $KARAF_UNEXPECTED_LOG — errors/warnings not matching expected hardening patterns ($unexpected_blocks blocks)" + n=$((n + 1)) + echo "$n. $KARAF_EXCEPTION_INDEX — top recurring exception/error messages" + n=$((n + 1)) + echo "$n. $KARAF_ERRORS_LOG — merged error/warn blocks with full stack traces ($error_blocks blocks)" + n=$((n + 1)) + echo "$n. $KARAF_RECENT_LOG — recent/full log tail for timing context" + echo + echo "## Global ignores (same as LogChecker fast paths)" + echo "- WARN from BundleWatcher (bundle not active during startup)" + echo "- Karaf JaxbUtil deprecated features.xml namespace warning" + echo + echo "## Top recurring signals" + if [ -f "$index_tsv" ]; then + tail -n +2 "$index_tsv" | head -15 | while IFS=$'\t' read -r rank count first_line sample; do + echo "- [$count x, first@$first_line] $sample" + done + else + echo "(none)" + fi + echo + echo "## Workflow" + echo "1. Confirm failures in failsafe-reports/ and failed-tests.txt ($failed_count listed)" + echo "2. Read failure correlation excerpts for server-side context" + echo "3. Use unexpected candidates for novel errors; cross-check expected-karaf-log-patterns.txt" + } > "$out" +} + +filter_unexpected_karaf_blocks() { + local filtered_log="$1" + local out="$2" + : > "$out" + + local block="" + local block_has_unexpected=0 + local line + + flush_block() { + if [ -n "$block" ] && [ "$block_has_unexpected" -eq 1 ]; then + printf '%s\n' "$block" >> "$out" + fi + block="" + block_has_unexpected=0 + } + + while IFS= read -r line || [ -n "$line" ]; do + if is_karaf_block_header "$line"; then + flush_block + block="$line" + continue + fi + if [ -z "$block" ]; then + continue + fi + if [ -z "$line" ] && [ -n "$block" ]; then + block="$block"$'\n' + continue + fi + block="$block"$'\n'"$line" + if karaf_line_is_actionable_trigger "$line"; then + block_has_unexpected=1 + fi + done < "$filtered_log" + flush_block +} + +run_karaf_triage_step() { + local spinner_msg="$1" + local rel_path="$2" + local success_msg="$3" + shift 3 + + ui_spinner_run "$spinner_msg" "$@" + register_staged_file "$rel_path" "$success_msg" +} + +process_karaf_log_dir() { + local log_dir="$1" + local rel base_dir dest_dir merged_tmp + local context_label segment_count segment_summary recent_lines + local errors_path unexpected_path index_path correlation_path summary_path failed_path + local seg name + + rel="${log_dir#"$TARGET_DIR"/}" + base_dir="$rel" + dest_dir="$(staging_path "$base_dir")" + mkdir -p "$dest_dir" + + segment_count="$(count_karaf_log_segments "$log_dir")" + [ "$segment_count" -gt 0 ] || return + + segment_summary="$(karaf_segment_summary "$segment_count")" + context_label="${KARAF_MATCH_CONTEXT_BEFORE}+${KARAF_MATCH_CONTEXT_AFTER} (+stack≤${KARAF_STACK_MAX_LINES})" + errors_path="$(karaf_staged_path "$base_dir" "$KARAF_ERRORS_LOG")" + unexpected_path="$(karaf_staged_path "$base_dir" "$KARAF_UNEXPECTED_LOG")" + index_path="$(karaf_staged_path "$base_dir" "$KARAF_EXCEPTION_INDEX")" + correlation_path="$(karaf_staged_path "$base_dir" "$KARAF_FAILURE_CORRELATION_LOG")" + summary_path="$(karaf_staged_path "$base_dir" "$KARAF_TRIAGE_SUMMARY")" + failed_path="$(staging_path "$KARAF_FAILED_TESTS_REL")" + + if [ "$segment_count" -gt 1 ]; then + write_karaf_segments_index "$log_dir" "$base_dir" + fi + + if [ "$FULL_KARAF" = true ]; then + while IFS= read -r seg; do + [ -n "$seg" ] || continue + name="$(basename "$seg")" + cp "$seg" "$dest_dir/$name" + log_staged_file "$base_dir/$name" "Added full $seg" + done < <(list_karaf_log_segments "$log_dir") + fi + + merged_tmp="$(mktemp "${TMPDIR:-/tmp}/karaf-merged.XXXXXX")" + merge_karaf_log_segments "$log_dir" "$merged_tmp" + + write_karaf_recent_excerpt "$merged_tmp" "$dest_dir/$KARAF_RECENT_LOG" + recent_lines="$(wc -l < "$dest_dir/$KARAF_RECENT_LOG" | tr -d ' ')" + register_staged_file "$base_dir/$KARAF_RECENT_LOG" \ + "Added karaf excerpt ($recent_lines lines, max $KARAF_TAIL_MAX_LINES) from $segment_summary under $log_dir" + + if ! run_karaf_triage_step \ + "Mining Karaf errors (merged blocks, full stacks)..." \ + "$base_dir/$KARAF_ERRORS_LOG" \ + "Added merged error/warn blocks ($context_label) from $segment_summary under $log_dir" \ + extract_karaf_errors_with_context "$merged_tmp" "$errors_path"; then + rm -f "$merged_tmp" + return + fi + + ui_spinner_run "Building exception index..." \ + write_karaf_exception_index "$merged_tmp" "$index_path" + register_staged_file "$base_dir/$KARAF_EXCEPTION_INDEX" \ + "Added recurring exception/error index from $segment_summary under $log_dir" + + if ! run_karaf_triage_step \ + "Filtering expected hardening noise..." \ + "$base_dir/$KARAF_UNEXPECTED_LOG" \ + "Added unexpected-only karaf candidates from $segment_summary under $log_dir" \ + filter_unexpected_karaf_blocks "$errors_path" "$unexpected_path"; then + ui_detail "All filtered karaf errors/warnings match expected hardening patterns" + fi + + if write_karaf_failure_correlation "$merged_tmp" "$failed_path" "$correlation_path"; then + register_staged_file "$base_dir/$KARAF_FAILURE_CORRELATION_LOG" \ + "Added Karaf excerpts correlated with failing tests" + else + ui_detail "Skipped $KARAF_FAILURE_CORRELATION_LOG (no failed-tests.txt or no anchors)" + fi + + write_karaf_triage_summary "$merged_tmp" "$errors_path" "$unexpected_path" \ + "$correlation_path" "$index_path" "$summary_path" + register_staged_file "$base_dir/$KARAF_TRIAGE_SUMMARY" \ + "Added Karaf triage summary (LLM entry point for log analysis)" + + rm -f "$merged_tmp" + mark_included +} + +archive_karaf_logs() { + local log_dir found=0 + + while IFS= read -r log_dir; do + [ -n "$log_dir" ] || continue + if ! karaf_log_dir_has_segments "$log_dir"; then + continue + fi + found=1 + process_karaf_log_dir "$log_dir" + done < <( + find "$TARGET_DIR/exam" -type f \( -name "$KARAF_PRIMARY_LOG" -o -name "$KARAF_PRIMARY_LOG".'[0-9]*' \) \ + -path '*/data/log/*' -print 2>/dev/null \ + | sed 's|/[^/]*$||' \ + | sort -u + ) + + if [ "$found" -eq 0 ]; then + echo " (no target/exam/*/data/log/karaf.log or karaf.log.N found)" >> "$MANIFEST" + ui_warn "No Karaf log segments under $TARGET_DIR/exam" + fi +} diff --git a/itests/it-run-lib.sh b/itests/it-run-lib.sh new file mode 100644 index 000000000..2ee4d7ba9 --- /dev/null +++ b/itests/it-run-lib.sh @@ -0,0 +1,442 @@ +# Shared helpers for IT run scripts (sourced, not executed). + +IT_SCRIPT_DIR="" +IT_ARCHIVES_DIR="" + +IT_ARCHIVES_DIR_NAME="archives" +IT_RUN_DIR_GLOB="it-run-*" +IT_TEST_RESULTS="test-results.tsv" +IT_RUN_SUMMARY="run-summary.properties" +IT_RUNS_INDEX="runs-index.tsv" +IT_FAILSAFE_XML="TEST-org.apache.unomi.itests.AllITs.xml" +IT_LATEST_COMPARISON="latest-comparison.txt" +IT_CAPTURE_COMPARISON="comparison-last-3.txt" +IT_AUTO_COMPARE_LAST=3 +IT_RUN_FINGERPRINT_FIELD="run.fingerprint" +IT_TRACE_FINGERPRINT_FIELDS='^(search\.engine|search\.heap|karaf\.heap|single\.test|use\.opensearch|maven\.exit\.code)=' +IT_RUN_SUMMARY_EXCERPT_FIELDS='^(run\.captured|search\.engine|it\.karaf\.heap|elasticsearch\.heap|opensearch\.heap|tests\.failures|tests\.errors|operator\.note)=' +IT_ENGINE_PORT_FILES=( + elasticsearch-port.properties + opensearch-port.properties +) +IT_RUN_CONTEXT_NO_OPERATOR_NOTE='(none — pass --message "..." to describe run conditions, e.g. heavy swap, CI runner, single-test rerun)' + +it_run_lib_init() { + IT_SCRIPT_DIR="$1" + IT_ARCHIVES_DIR="$IT_SCRIPT_DIR/$IT_ARCHIVES_DIR_NAME" +} + +it_run_tools_init() { + local script_dir="$1" + cd "$script_dir" + it_run_lib_init "$script_dir" +} + +it_utc_now() { + date -u +%Y-%m-%dT%H:%M:%SZ +} + +it_hostname() { + hostname 2>/dev/null || echo unknown +} + +it_dir_has_files() { + [ -d "$1" ] && [ -n "$(ls -A "$1" 2>/dev/null)" ] +} + +it_extract_xml_property() { + local file="$1" + local property="$2" + + grep -m1 "name=\"$property\"" "$file" 2>/dev/null \ + | sed -n 's/.*value="\([^"]*\)".*/\1/p' +} + +it_read_pom_profile_property() { + local pom="$1" + local profile="$2" + local property="$3" + + awk -v profile="$profile" -v prop="$property" ' + $0 ~ "<id>" profile "</id>" { in_profile=1 } + in_profile && $0 ~ "<" prop ">" { + gsub(/.*<[^>]+>/, "") + gsub(/<.*/, "") + print + exit + } + in_profile && $0 ~ "</profile>" { exit } + ' "$pom" +} + +it_infer_search_engine() { + local target_dir="$1" + + if it_dir_has_files "$target_dir/opensearch0/logs"; then + echo "opensearch" + elif it_dir_has_files "$target_dir/elasticsearch0/logs"; then + echo "elasticsearch" + elif [ -f "$target_dir/opensearch-port.properties" ]; then + echo "opensearch" + elif [ -f "$target_dir/elasticsearch-port.properties" ]; then + echo "elasticsearch" + else + echo "unknown" + fi +} + +it_run_label() { + basename "$1" +} + +it_read_properties_field() { + local file="$1" + local field="$2" + + if [ ! -f "$file" ]; then + echo "" + return + fi + grep -m1 "^${field}=" "$file" 2>/dev/null | cut -d= -f2- +} + +it_read_run_summary_field() { + it_read_properties_field "$1/$IT_RUN_SUMMARY" "$2" +} + +it_validate_run_dir() { + local dir="$1" + + if [ ! -d "$dir" ]; then + ui_error "Not a directory: $dir" + exit 1 + fi + if [ ! -f "$dir/$IT_TEST_RESULTS" ]; then + ui_error "Missing $IT_TEST_RESULTS in $dir (run archive-it-run.sh first)" + exit 1 + fi +} + +it_find_archive_run_dirs() { + find "$IT_ARCHIVES_DIR" -maxdepth 1 -type d -name "$IT_RUN_DIR_GLOB" 2>/dev/null | sort -r +} + +it_collect_comparable_run_dirs() { + local max_count="$1" + local -a dirs=() + local dir + + while IFS= read -r dir; do + [ -n "$dir" ] || continue + [ -f "$dir/$IT_TEST_RESULTS" ] || continue + dirs+=("$dir") + if [ "$max_count" -gt 0 ] && [ "${#dirs[@]}" -ge "$max_count" ]; then + break + fi + done < <(it_find_archive_run_dirs) + + printf '%s\n' "${dirs[@]}" +} + +# Newest N captures that have test-results.tsv, returned oldest → newest (for compare). +it_find_comparable_run_dirs() { + local count="$1" + local -a dirs=() + local i + + while IFS= read -r dir; do + [ -n "$dir" ] && dirs+=("$dir") + done < <(it_collect_comparable_run_dirs "$count") + + for ((i = ${#dirs[@]} - 1; i >= 0; i--)); do + printf '%s\n' "${dirs[i]}" + done +} + +it_count_comparable_runs() { + it_collect_comparable_run_dirs 0 | wc -l | tr -d ' ' +} + +it_read_comparison_total() { + local value + value="$(it_read_properties_field "$1" "$2")" + if [ -z "$value" ]; then + echo "0" + else + echo "$value" + fi +} + +it_append_run_summary_excerpt() { + local run_dir="$1" + + if [ ! -f "$run_dir/$IT_RUN_SUMMARY" ]; then + return + fi + grep -E "$IT_RUN_SUMMARY_EXCERPT_FIELDS" "$run_dir/$IT_RUN_SUMMARY" 2>/dev/null || true +} + +it_failsafe_testcase_awk() { + cat <<'AWK' +function attr(line, key, pos, rest) { + pos = index(line, key "=\"") + if (pos == 0) return "" + rest = substr(line, pos + length(key) + 2) + sub(/".*/, "", rest) + return rest +} +/<testcase / { + current = attr($0, "classname") "\t" attr($0, "name") + if (current == "\t") current = "" + if (current != "") { + status[current] = "PASS" + elapsed[current] = attr($0, "time") + } +} +current != "" && /<failure/ { status[current] = "FAIL" } +current != "" && /<error/ { status[current] = "ERROR" } +current != "" && /<skipped/ { status[current] = "SKIP" } +AWK +} + +it_write_test_results_from_xml() { + local xml="$1" + local tsv="$2" + local failed="${3:-}" + + awk "$(it_failsafe_testcase_awk) + END { + print \"test_class\ttest_method\tstatus\telapsed_s\" + for (t in status) { + split(t, parts, \"\\t\") + print parts[1] \"\\t\" parts[2] \"\\t\" status[t] \"\\t\" elapsed[t] + } + }" "$xml" | sort -t $'\t' -k1,1 -k2,2 > "$tsv" + + if [ -n "$failed" ]; then + awk -F '\t' '$3 == "FAIL" || $3 == "ERROR" { print $1 "." $2 }' "$tsv" >> "$failed" + fi +} + +it_test_outcomes_fingerprint_from_xml() { + local xml="$1" + + awk "$(it_failsafe_testcase_awk) + END { + for (t in status) { + split(t, parts, \"\\t\") + print parts[1] \"\\t\" parts[2] \"\\t\" status[t] + } + }" "$xml" | sort -t $'\t' -k1,1 -k2,2 | it_hash_lines +} + +it_summarize_run_dir() { + local run_dir="$1" + local label failures errors + + label="$(it_run_label "$run_dir")" + failures="$(it_read_run_summary_field "$run_dir" tests.failures)" + errors="$(it_read_run_summary_field "$run_dir" tests.errors)" + failures="${failures:-?}" + errors="${errors:-?}" + printf '%s (failures=%s errors=%s)' "$label" "$failures" "$errors" +} + +it_hash_lines() { + shasum -a 256 2>/dev/null | awk '{print $1}' +} + +it_git_head() { + local repo_root="$1" + + if command -v git >/dev/null 2>&1 \ + && git -C "$repo_root" rev-parse HEAD >/dev/null 2>&1; then + git -C "$repo_root" rev-parse HEAD + else + echo "unknown" + fi +} + +it_git_branch() { + local repo_root="$1" + + if command -v git >/dev/null 2>&1 \ + && git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + git -C "$repo_root" rev-parse --abbrev-ref HEAD 2>/dev/null || echo unknown + else + echo "unknown" + fi +} + +it_git_in_repo() { + local repo_root="$1" + command -v git >/dev/null 2>&1 \ + && git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1 +} + +it_target_failsafe_xml() { + local target_dir="$1" + local path="$target_dir/failsafe-reports/$IT_FAILSAFE_XML" + + if [ -f "$path" ]; then + echo "$path" + return 0 + fi + return 1 +} + +it_grep_property_lines() { + local file="$1" + local pattern="$2" + + if [ ! -f "$file" ]; then + return 0 + fi + grep -E "$pattern" "$file" 2>/dev/null || true +} + +it_failsafe_summary_count() { + local summary_xml="$1" + local tag="$2" + + if [ ! -f "$summary_xml" ]; then + echo "" + return + fi + grep -m1 "<${tag}>" "$summary_xml" 2>/dev/null | sed 's/[^0-9]//g' +} + +it_ensure_run_fingerprint() { + local xml="$1" + local repo_root="$2" + local trace_file="$3" + local current_fp="$4" + + if [ -n "$current_fp" ]; then + echo "$current_fp" + return + fi + it_compute_run_fingerprint_from_target "$xml" "$repo_root" "$trace_file" +} + + +it_test_outcomes_fingerprint_from_tsv() { + local tsv="$1" + + awk -F '\t' 'NR > 1 { print $1 "\t" $2 "\t" $3 }' "$tsv" \ + | sort -t $'\t' -k1,1 -k2,2 \ + | it_hash_lines +} + +it_trace_fingerprint_from_file() { + local trace_file="$1" + + if [ ! -f "$trace_file" ]; then + echo "none" + return + fi + grep -E "$IT_TRACE_FINGERPRINT_FIELDS" "$trace_file" 2>/dev/null | sort | it_hash_lines +} + +it_trace_fingerprint_from_summary() { + it_trace_fingerprint_from_file "$1/$IT_RUN_SUMMARY" +} + +it_build_run_fingerprint() { + local git_commit="$1" + local outcomes_fp="$2" + local trace_fp="$3" + + printf '%s\n%s\n%s\n' "$git_commit" "$outcomes_fp" "$trace_fp" | it_hash_lines +} + +it_compute_run_fingerprint_from_target() { + local xml="$1" + local repo_root="$2" + local trace_file="$3" + + it_build_run_fingerprint \ + "$(it_git_head "$repo_root")" \ + "$(it_test_outcomes_fingerprint_from_xml "$xml")" \ + "$(it_trace_fingerprint_from_file "$trace_file")" +} + +it_compute_run_fingerprint_from_archive() { + local run_dir="$1" + local stored git_commit + + stored="$(it_read_run_summary_field "$run_dir" "$IT_RUN_FINGERPRINT_FIELD")" + if [ -n "$stored" ]; then + echo "$stored" + return 0 + fi + + if [ ! -f "$run_dir/$IT_TEST_RESULTS" ]; then + return 1 + fi + + git_commit="$(it_read_run_summary_field "$run_dir" git.commit)" + if [ -z "$git_commit" ]; then + git_commit="unknown" + fi + + it_build_run_fingerprint \ + "$git_commit" \ + "$(it_test_outcomes_fingerprint_from_tsv "$run_dir/$IT_TEST_RESULTS")" \ + "$(it_trace_fingerprint_from_summary "$run_dir")" +} + +it_find_duplicate_archive() { + local fingerprint="$1" + local run_dir existing_fp + + [ -n "$fingerprint" ] || return 1 + + while IFS= read -r run_dir; do + [ -n "$run_dir" ] || continue + existing_fp="$(it_compute_run_fingerprint_from_archive "$run_dir")" || continue + if [ "$existing_fp" = "$fingerprint" ]; then + echo "$run_dir" + return 0 + fi + done < <(it_find_archive_run_dirs) + + return 1 +} + +it_sanitize_index_field() { + local value="$1" + value="${value//$'\t'/ }" + value="${value//$'\n'/ }" + printf '%s' "$value" +} + +it_update_runs_index() { + local index run_dir captured git_commit engine failures errors failed_count note + local karaf_heap search_heap run_id + + mkdir -p "$IT_ARCHIVES_DIR" + index="$IT_ARCHIVES_DIR/$IT_RUNS_INDEX" + { + echo -e "run_id\tcaptured_utc\tgit_commit\tsearch_engine\tkaraf_heap\tsearch_heap\ttests_failures\ttests_errors\tfailed_tests_count\toperator_note\trun_path" + for run_dir in "$IT_ARCHIVES_DIR"/$IT_RUN_DIR_GLOB; do + [ -d "$run_dir" ] || continue + [ -f "$run_dir/$IT_RUN_SUMMARY" ] || continue + run_id="$(it_run_label "$run_dir")" + captured="$(it_read_run_summary_field "$run_dir" run.captured)" + git_commit="$(it_read_run_summary_field "$run_dir" git.commit)" + engine="$(it_read_run_summary_field "$run_dir" search.engine)" + karaf_heap="$(it_read_run_summary_field "$run_dir" it.karaf.heap)" + search_heap="$(it_read_run_summary_field "$run_dir" elasticsearch.heap)" + if [ -z "$search_heap" ]; then + search_heap="$(it_read_run_summary_field "$run_dir" opensearch.heap)" + fi + failures="$(it_read_run_summary_field "$run_dir" tests.failures)" + errors="$(it_read_run_summary_field "$run_dir" tests.errors)" + failed_count="$(it_read_run_summary_field "$run_dir" failed.tests.count)" + note="$(it_sanitize_index_field "$(it_read_run_summary_field "$run_dir" operator.note)")" + echo -e "${run_id}\t${captured}\t${git_commit}\t${engine}\t${karaf_heap}\t${search_heap}\t${failures}\t${errors}\t${failed_count}\t${note}\t${run_dir}" + done + } > "$index" + ui_detail "Updated $index" +} diff --git a/itests/it-run-ui.sh b/itests/it-run-ui.sh new file mode 100644 index 000000000..34a544297 --- /dev/null +++ b/itests/it-run-ui.sh @@ -0,0 +1,255 @@ +# Presentation layer for IT run tooling (sourced, not executed directly). + +UI_USE_COLOR=0 +UI_SPINNER_PID="" + +ui_init() { + UI_USE_COLOR=0 + if [ -t 1 ] && [ -z "${NO_COLOR:-}" ] && [ -z "${CI:-}" ]; then + UI_USE_COLOR=1 + fi +} + +ui__color_enabled() { + [ "$UI_USE_COLOR" -eq 1 ] +} + +ui__fg() { + local code="$1" + ui__color_enabled && printf '\033[%sm' "$code" +} + +ui__reset() { ui__fg 0; } +ui__bold() { ui__fg 1; } +ui__dim() { ui__fg 2; } +ui__red() { ui__fg 31; } +ui__grn() { ui__fg 32; } +ui__ylw() { ui__fg 33; } +ui__blu() { ui__fg 34; } +ui__mag() { ui__fg 35; } +ui__cyn() { ui__fg 36; } + +ui__line() { + local glyph="$1" + local color_fn="$2" + local indent="$3" + local message="$4" + local stream="${5:-1}" + local plain_label="${6:-}" + + if ui__color_enabled; then + $color_fn + printf '%s%s ' "$indent" "$glyph" + ui__reset + printf '%s\n' "$message" >&"$stream" + elif [ "$stream" -eq 2 ]; then + printf '%s: %s\n' "$plain_label" "$message" >&2 + else + printf '%s%s\n' "$indent" "$message" + fi +} + +ui__glyph_status() { + local ok="$1" + local message="$2" + local stream="${3:-2}" + + if ui__color_enabled; then + if [ "$ok" -eq 0 ]; then + ui__grn; printf ' ✔ '; ui__reset + else + ui__red; printf ' ✖ '; ui__reset + message="$message (failed)" + fi + echo "$message" >&"$stream" + elif [ "$ok" -ne 0 ]; then + echo "ERROR: $message" >&"$stream" + fi +} + +ui__banner_box() { + local title="$1" + + if ui__color_enabled; then + ui__cyn; ui__bold + cat <<'EOF' + ╔═══════════════════════════════════════════════════════════╗ + ║ ║ + ║ ██╗ ██╗███╗ ██╗ ██████╗ ███╗ ███╗██╗ ║ + ║ ██║ ██║████╗ ██║██╔═══██╗████╗ ████║██║ ║ + ║ ██║ ██║██╔██╗ ██║██║ ██║██╔████╔██║██║ ║ + ║ ██║ ██║██║╚██╗██║██║ ██║██║╚██╔╝██║██║ ║ + ║ ╚██████╔╝██║ ╚████║╚██████╔╝██║ ╚═╝ ██║██║ ║ + ║ ╚═════╝ ╚═╝ ╚═══╝ ╚═════╝ ╚═╝ ╚═╝╚═╝ ║ + ║ ║ +EOF + printf ' ║ %-57s║\n' "$title" + echo ' ╚═══════════════════════════════════════════════════════════╝' + ui__reset + else + echo "$title" + fi +} + +ui__banner_with_tagline() { + local title="$1" + local tagline="$2" + + ui__banner_box "$title" + if [ -z "$tagline" ]; then + return + fi + if ui__color_enabled; then + ui__dim; printf ' '; ui__reset + ui_typewriter "$tagline" + echo + else + echo "$tagline" + fi +} + +ui_banner() { + ui__banner_with_tagline "IT RUN ARCHIVER · pack · triage · ship" \ + 'Collecting artifacts for humans and LLMs alike...' +} + +ui_banner_compare() { + ui__banner_with_tagline "IT RUN COMPARATOR · systematic · flaky · drift" \ + 'Diffing captures to separate real bugs from noise...' +} + +ui_typewriter() { + local text="$1" + local delay="${2:-0.012}" + local i + + if ! ui__color_enabled; then + echo "$text" + return + fi + for ((i = 0; i < ${#text}; i++)); do + printf '%s' "${text:i:1}" + sleep "$delay" + done + echo +} + +ui_phase() { + local step="$1" + local label="$2" + if ui__color_enabled; then + ui__blu; ui__bold + printf '▸ [%s] %s\n' "$step" "$label" + ui__reset + else + printf '[%s] %s\n' "$step" "$label" + fi +} + +ui_detail() { ui__line '→' ui__dim ' ' "$1"; } +ui_warn() { ui__line '⚠' ui__ylw ' ' "$1" 2 WARN; } +ui_error() { ui__line '✖' ui__red ' ' "$1" 2 ERROR; } + +ui_metric() { + local label="$1" + local value="$2" + if ui__color_enabled; then + ui__dim; printf ' %-14s' "$label"; ui__reset + ui__bold; printf '%s\n' "$value"; ui__reset + else + printf ' %s %s\n' "$label" "$value" + fi +} + +ui_spinner_cleanup() { + if [ -z "$UI_SPINNER_PID" ]; then + return + fi + kill "$UI_SPINNER_PID" 2>/dev/null || true + wait "$UI_SPINNER_PID" 2>/dev/null || true + UI_SPINNER_PID="" + printf '\r\033[K' >&2 +} + +ui_spinner_start() { + local message="$1" + if ! ui__color_enabled; then + echo "$message" + return + fi + ui_spinner_cleanup + ( + local spin='|/-\' + local i=0 + while true; do + ui__cyn + printf '\r %s %s' "${spin:i%4:1}" "$message" + ui__reset + i=$((i + 1)) + sleep 0.12 + done + ) >&2 & + UI_SPINNER_PID=$! +} + +ui_spinner_stop() { + local message="$1" + local status="${2:-0}" + + if ! ui__color_enabled; then + return "$status" + fi + ui_spinner_cleanup + ui__glyph_status "$status" "$message" + return "$status" +} + +ui_spinner_run() { + local message="$1" + shift + ui_spinner_start "$message" + "$@" + local status=$? + ui_spinner_stop "$message" "$status" + return "$status" +} + +ui__finish_box() { + local title="$1" + local emoji="$2" + local path="$3" + local detail="$4" + + if ui__color_enabled; then + ui__mag; ui__bold + printf '\n ╭───────────────────────────────────────────────────────────╮\n' + printf ' │ %-57s│\n' "$title" + printf ' ╰───────────────────────────────────────────────────────────╯\n' + ui__reset + ui__grn; printf ' %s ' "$emoji"; ui__reset + echo "$path" + ui__dim; printf ' %s\n' "$detail"; ui__reset + else + echo "$title: $path" + echo "$detail" + fi +} + +ui_finish() { + local path="$1" + local size="$2" + ui__finish_box "ARCHIVE READY" '📦' "$path" "$size bytes · tar -tzf $(printf '%q' "$path")" +} + +ui_finish_dir() { + local path="$1" + local size="$2" + local files="$3" + ui__finish_box "RUN CAPTURE READY" '📂' "$path" "$size bytes · $files files · start with run-context.txt" +} + +ui_finish_report() { + local path="$1" + local size="$2" + ui__finish_box "COMPARISON READY" '📊' "$path" "$size bytes · share with an LLM or open in your editor" +} diff --git a/itests/kt.sh b/itests/kt.sh new file mode 100755 index 000000000..312cb574f --- /dev/null +++ b/itests/kt.sh @@ -0,0 +1,259 @@ +#!/bin/bash +################################################################################ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +# ASCII art and version +print_header() { + cat << "EOF" + _ __ __ _____ _ +| |/ / / _| |_ _| | | +| ' / __ _ _ __ __ _| |_ | | ___ ___ | |___ +| < / _` | '__/ _` | _| | |/ _ \ / _ \| / __| +| . \ (_| | | | (_| | | | | (_) | (_) | \__ \ +|_|\_\__,_|_| \__,_|_| \_/\___/ \___/|_|___/ + +EOF + echo "--------------------------------------------------" + echo "Karaf Test Tools (kt.sh) - Version 1.0.0" + echo "A utility for managing Apache Karaf test instances" + echo "--------------------------------------------------" +} + +# Function to get file modification time in a cross-platform way +get_file_mtime() { + if [[ "$OSTYPE" == "darwin"* ]]; then + # macOS + stat -f "%m %N" "$1" + else + # Linux and others + stat -c "%Y %n" "$1" + fi +} + +# Function to find the latest Karaf directory +find_karaf_dir() { + if [ ! -d "target/exam" ]; then + echo "Error: Directory target/exam does not exist" >&2 + return 1 + fi + + local latest_time=0 + local latest_dir="" + + while IFS= read -r dir; do + local stat_output=$(get_file_mtime "$dir") + local mtime=$(echo "$stat_output" | cut -d' ' -f1) + + if (( mtime > latest_time )); then + latest_time=$mtime + latest_dir=$dir + fi + done < <(find target/exam -maxdepth 1 -type d -name "*-*-*-*-*" 2>/dev/null) + + if [ -z "$latest_dir" ]; then + echo "Error: No Karaf test directory found in target/exam" >&2 + return 1 + fi + echo "$latest_dir" +} + +# Function to find the latest Karaf log file +find_karaf_log() { + local karaf_dir=$(find_karaf_dir) + if [ $? -ne 0 ]; then + return 1 + fi + local log_file="$karaf_dir/data/log/karaf.log" + if [ ! -f "$log_file" ]; then + echo "Error: Karaf log file not found at $log_file" >&2 + return 1 + fi + echo "$log_file" +} + +# Function to find the Karaf executable +find_karaf_exec() { + local karaf_dir=$(find_karaf_dir) + if [ $? -ne 0 ]; then + return 1 + fi + local karaf_exec="$karaf_dir/bin/karaf" + if [ ! -f "$karaf_exec" ]; then + echo "Error: Karaf executable not found at $karaf_exec" >&2 + return 1 + fi + echo "$karaf_exec" +} + +# Function to check if Karaf is running +is_karaf_running() { + local karaf_dir=$(find_karaf_dir) + if [ $? -ne 0 ]; then + return 1 + fi + local pid_file="$karaf_dir/data/karaf.pid" + if [ -f "$pid_file" ]; then + local pid=$(cat "$pid_file") + if ps -p "$pid" > /dev/null; then + return 0 + fi + fi + return 1 +} + +usage() { + print_header + cat << EOF + +DESCRIPTION + This script helps manage and inspect Karaf test instances during integration + testing. It automatically finds the most recent test instance and provides + convenient commands for viewing logs and navigating directories. + +USAGE + $(basename $0) COMMAND [ARGS] + +COMMANDS + start, s Start the Karaf instance + Launches Karaf in the background + + debug, d Start Karaf in debug mode + Launches Karaf with JPDA debugging enabled on port 5005 + + console, c Start Karaf in console mode + Launches Karaf in foreground with direct console access + + stop, x Stop the running Karaf instance + Gracefully shuts down the Karaf container + + log, l View the latest Karaf log file using less + Useful for scrolling through the entire log file + + tail, t Tail the current Karaf log file + Follows the log in real-time, great for watching test execution + + grep, g Grep the latest Karaf log file + Searches for patterns in the log file + Example: kt.sh grep "ERROR" finds all error messages + + dir, i Print the latest Karaf directory path + Shows the full path to the most recent test instance + + pushd, p Change to the latest Karaf directory using pushd + Quickly navigate to the test instance directory + Use 'popd' to return to the previous directory + + help, h Show this help message + +EXAMPLES + $(basename $0) start # Start Karaf instance + $(basename $0) debug # Start Karaf in debug mode + $(basename $0) console # Start Karaf with direct console access + $(basename $0) stop # Stop Karaf instance + $(basename $0) log # View complete log with less + $(basename $0) tail # Watch log updates in real-time + $(basename $0) grep ERROR # Find all ERROR messages in log + $(basename $0) dir # Show path to latest test instance + $(basename $0) pushd # Jump to test instance directory + +TIPS + - The script automatically finds the most recent test instance + - All commands have short aliases (single letter) for quick access + - Use 'tail' during test execution to monitor progress + - Use 'grep' to search for specific test failures or errors + - Debug mode allows remote debugging on port 5005 +EOF +} + +# Main command processing +case "${1:-help}" in + start|s) + if is_karaf_running; then + echo "Error: Karaf is already running" >&2 + exit 1 + fi + karaf_exec=$(find_karaf_exec) + if [ $? -eq 0 ]; then + echo "Starting Karaf..." + "$karaf_exec" start + echo "Karaf started. Use 'tail' command to follow the logs." + fi + ;; + console|c) + if is_karaf_running; then + echo "Error: Karaf is already running" >&2 + exit 1 + fi + karaf_exec=$(find_karaf_exec) + if [ $? -eq 0 ]; then + echo "Starting Karaf in console mode..." + exec "$karaf_exec" + fi + ;; + debug|d) + if is_karaf_running; then + echo "Error: Karaf is already running" >&2 + exit 1 + fi + karaf_exec=$(find_karaf_exec) + if [ $? -eq 0 ]; then + echo "Starting Karaf in debug mode (port 5005)..." + KARAF_DEBUG=true "$karaf_exec" debug + echo "Karaf started in debug mode. Connect debugger to port 5005." + fi + ;; + stop|x) + if ! is_karaf_running; then + echo "Error: Karaf is not running" >&2 + exit 1 + fi + karaf_exec=$(find_karaf_exec) + if [ $? -eq 0 ]; then + echo "Stopping Karaf..." + "$karaf_exec" stop + echo "Karaf stopped." + fi + ;; + log|l) + log_file=$(find_karaf_log) + [ $? -eq 0 ] && less "$log_file" + ;; + tail|t) + log_file=$(find_karaf_log) + [ $? -eq 0 ] && tail -f "$log_file" + ;; + grep|g) + if [ -z "$2" ]; then + echo "Error: grep pattern required" >&2 + exit 1 + fi + log_file=$(find_karaf_log) + [ $? -eq 0 ] && grep "$2" "$log_file" + ;; + dir|i) + find_karaf_dir + ;; + pushd|p) + karaf_dir=$(find_karaf_dir) + [ $? -eq 0 ] && pushd "$karaf_dir" > /dev/null && echo "Changed to: $PWD" + ;; + help|h|*) + usage + ;; +esac diff --git a/itests/llm-it-run-analysis-guide.md b/itests/llm-it-run-analysis-guide.md new file mode 100644 index 000000000..6fd3bfb7a --- /dev/null +++ b/itests/llm-it-run-analysis-guide.md @@ -0,0 +1,109 @@ +# LLM analysis guide — Apache Unomi integration test archive + +Read this file **before** analyzing Karaf logs or exceptions in this archive. + +**Start with `run-context.txt`** for operator notes, build/Maven options (search engine, heap sizes), and a system snapshot. + +**Compare multiple runs** with `test-results.tsv` (per-test PASS/FAIL/ERROR) and `archives/runs-index.tsv` (one row per capture). Run: + +```bash +./compare-it-runs.sh --last 3 +``` + +## What actually failed + +**Source of truth for test pass/fail:** `failsafe-reports/` + +1. `failsafe-reports/failsafe-summary.xml` — counts (failures, errors, skipped) +2. `failsafe-reports/org.apache.unomi.itests.AllITs.txt` — human-readable failure list and stack traces +3. `failsafe-reports/TEST-org.apache.unomi.itests.AllITs.xml` — structured JUnit XML + +If a test is **green** in failsafe reports, do **not** treat related Karaf ERROR/WARN lines as regressions. + +## Expected noise in Karaf logs (failure hardening) + +Many integration tests **intentionally** trigger invalid requests, bad schemas, or security blocks. The server logs ERROR/WARN/Exception lines for those scenarios. That is normal. + +The project encodes allowed log noise in `LogChecker.createLogChecker()` overrides (`InputValidationIT`, `JSONSchemaIT`, `CopyPropertiesActionIT`, …). This archive includes: + +- **`expected-karaf-log-patterns.txt`** — substring patterns extracted from those tests (keep in sync with Java) +- **`karaf-unexpected-candidates.log`** (if present) — ERROR/WARN/Exception lines that **did not** match any expected pattern + +### How to use `expected-karaf-log-patterns.txt` + +| Prefix | Meaning | +|--------|---------| +| `SUBSTRING: <text>` | If a log line **contains** `<text>`, treat it as **expected** (ignore for root-cause analysis) | +| `MULTIPART: a \| b \| c` | All parts must appear on the **same line**, in order — also expected | + +### Common intentional scenarios (not bugs by themselves) + +| Area | Examples in logs | Tests | +|------|------------------|-------| +| JSON Schema validation | `Schema not found for event type: dummy`, `Validation error`, `JsonSchemaException` | `JSONSchemaIT` | +| REST input validation | `Response status code: 400`, `Invalid parameter`, `InvalidRequestExceptionMapper` | `InputValidationIT` | +| Dummy fixtures | `dummy_scope`, `event type: dummy`, `dummy_workspace` | Schema/validation ITs | +| Copy property edge cases | `Impossible to copy the property` | `CopyPropertiesActionIT` | +| Security / auth probes | HTTP 401/403, rejected scripting payloads | `GraphQLServletSecurityIT`, `ContextServletIT` | + +### Analysis workflow + +1. Start from **failsafe failures** — list failing test methods only. +2. For each failure, read its stack trace in `AllITs.txt` / XML. +3. Open **`exam/.../karaf-triage-summary.txt`** for artifact priority and top recurring errors. +4. Read **`karaf-failure-correlation.log`** when `failed-tests.txt` is non-empty — server excerpts near failing class/method anchors. +5. Use **`karaf-unexpected-candidates.log`** for novel server-side clues. Blocks use `--- block N (lines X-Y) ---` with merged context and full stack traces (ANSI stripped). Rolled logs (`karaf.log.1`, …) are merged; see `karaf-log-segments.txt`. +6. **Ignore** lines in `expected-karaf-log-patterns.txt` unless they explain a **failing** test. Also ignore **BundleWatcher** startup WARN (LogChecker fast path). +7. Do **not** report "many exceptions in karaf.log" if failsafe shows fewer failures and exceptions match expected hardening patterns. + +## Comparing runs (systematic vs flaky) + +When analyzing **multiple** captures: + +1. Read `archives/runs-index.tsv` for a timeline (search engine, heaps, failure counts, operator notes). +2. For each run, open `test-results.tsv` — columns: `test_class`, `test_method`, `status`, `elapsed_s`. +3. Open `comparison-last-3.txt` in this capture (auto-generated when 2+ archives exist), or `archives/latest-comparison.txt`, or run `./compare-it-runs.sh --last N`. + +| Classification | Meaning | +|----------------|---------| +| **Systematic** | FAIL/ERROR in **every** run compared — likely a real bug or broken assumption | +| **Flaky** | Mixed PASS and FAIL/ERROR across runs — timing, resource pressure, or ordering | +| **Regression** | PASS in an earlier run, FAIL/ERROR in the latest | +| **Fixed** | FAIL/ERROR earlier, PASS in the latest | + +Correlate flaky tests with `operator.note` and `run-context.txt` system snapshots (swap, load, heap). + +## File map + +| File | Use | +|------|-----| +| `failsafe-reports/` | Test results (primary) | +| `surefire-reports/` | Small unit tests in itests module (pre-IT) | +| `exam/.../karaf-triage-summary.txt` | **Start here for logs** — volume stats, artifact priority, top recurring errors | +| `exam/.../karaf-failure-correlation.log` | Karaf excerpts near failing tests (from `failed-tests.txt` anchors) | +| `exam/.../karaf-exception-index.tsv` | Ranked recurring exception/error messages (`rank`, `count`, `first_line`, `sample`) | +| `exam/.../karaf-recent.log` | Full merged log (≤20k lines) or tail for timing context | +| `exam/.../karaf-log-segments.txt` | Rollover segment order when `karaf.log.1`, `.2`, … exist (16MB roll policy) | +| `exam/.../karaf.log[.N]` | Full segment files only with `--full-karaf` | +| `exam/.../karaf-errors-warnings.log` | Merged ERROR/WARN blocks (**15+15** context, extended through stack traces; overlaps merged) | +| `exam/.../karaf-unexpected-candidates.log` | Blocks where triggers are **not** globally ignored and **not** in expected patterns | +| `test-results.tsv` | Per-test PASS/FAIL/ERROR manifest for cross-run diff | +| `run-summary.properties` | Machine-readable run metadata (engine, heaps, counts, `run.fingerprint`) | +| `failed-tests.txt` | One failing test per line (`ClassName.method`) | +| `archives/runs-index.tsv` | Index of all captures under `archives/` | +| `comparison-last-3.txt` | Auto compare of last 3 captures (systematic / flaky / regression / fixed) | +| `archives/latest-comparison.txt` | Same report, always overwritten on each archive | +| `run-context.txt` | **Start here** — operator notes, build/Maven trace, inferred options, system snapshot | +| `run-config/it-run-trace.properties` | Raw trace from `./build.sh --integration-tests` (if present) | +| `expected-karaf-log-patterns.txt` | Ignore list for hardening tests | +| `run-config/*-port.properties` | Search engine Docker port mappings | +| `manifest.txt` | Capture metadata | + +## Output format + +When reporting findings: + +- Separate **confirmed test failures** (from failsafe) from **informational log noise** +- Cite the failing test class and method name +- Quote only **unexpected** log lines (or lines tied to a failing test) +- Suggest fixes only for failures that are not explained by intentional hardening
