This is an automated email from the ASF dual-hosted git repository.

epugh pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 13fd8ad7e4e Test rolling upgrade of Solr using Docker and BATS (#3706)
13fd8ad7e4e is described below

commit 13fd8ad7e4e0c57c371edbca1576f7d903dafcda
Author: David Smiley <[email protected]>
AuthorDate: Tue Feb 3 19:35:19 2026 -0500

    Test rolling upgrade of Solr using Docker and BATS (#3706)
    
    Demonstrates moving between two versions of Solr.
    
    Co-authored-by: copilot-swe-agent[bot] 
<[email protected]>
    Co-authored-by: dsmiley <[email protected]>
    Co-authored-by: Eric Pugh <[email protected]>
---
 solr/packaging/build.gradle                   |  11 +-
 solr/packaging/test/bats_helper.bash          |  48 +++----
 solr/packaging/test/test_extraction.bats      |  36 ++++--
 solr/packaging/test/test_rolling_upgrade.bats | 172 ++++++++++++++++++++++++++
 solr/packaging/test/test_start_solr.bats      |   2 +
 5 files changed, 221 insertions(+), 48 deletions(-)

diff --git a/solr/packaging/build.gradle b/solr/packaging/build.gradle
index 0e260f6c9ee..759528e7550 100644
--- a/solr/packaging/build.gradle
+++ b/solr/packaging/build.gradle
@@ -327,11 +327,14 @@ class BatsTask extends Exec {
   protected void exec() {
     executable "$project.ext.nodeProjectDir/node_modules/bats/bin/bats"
 
+    def batsArgs = []
+    if (logger.isInfoEnabled()) {
+      batsArgs << '--verbose-run'
+    }
+    batsArgs += ['-T', '--print-output-on-failure', '--report-formatter', 
'junit', '--output', "$project.buildDir/test-output"]
     // Note: tests to run must be listed after all other arguments
-    // Additional debugging output: -x, --verbose-run
-    setArgs(['-T', '--print-output-on-failure', '--report-formatter', 'junit', 
'--output', "$project.buildDir/test-output"] +
-        (testFiles.empty ? testDir : testFiles))
-
+    batsArgs += testFiles.empty ? [testDir] : testFiles
+    setArgs(batsArgs)
     super.exec()
   }
 }
diff --git a/solr/packaging/test/bats_helper.bash 
b/solr/packaging/test/bats_helper.bash
index db475a10d0f..d684724a2e5 100644
--- a/solr/packaging/test/bats_helper.bash
+++ b/solr/packaging/test/bats_helper.bash
@@ -97,41 +97,21 @@ collection_exists() {
   return 1
 }
 
-# Wait for a collection to be queryable
-wait_for_collection() {
-  local collection="$1"
-  local timeout=${2:-180}
-  local start_ts
-  start_ts=$(date +%s)
-  while true; do
-    if curl -s -S -f 
"http://localhost:${SOLR_PORT}/solr/${collection}/select?q=*:*"; | grep -q 
'"responseHeader"'; then
+# Utility function to retry a command until it succeeds or times out
+wait_for() {
+  local timeout="${1:-30}"     # Default 30 seconds timeout
+  local interval="${2:-1}"     # Default 1 second between retries
+  shift 2                      # Remove timeout and interval from args
+  local command=("$@")         # Remaining args are the command to execute
+  
+  local end_time=$(($(date +%s) + timeout))
+  
+  while [ $(date +%s) -lt $end_time ]; do
+    if "${command[@]}"; then
       return 0
     fi
-    local now
-    now=$(date +%s)
-    if [ $(( now - start_ts )) -ge ${timeout} ]; then
-      echo "Timed out waiting for collection '${collection}' to become 
queryable" >&2
-      return 1
-    fi
-    sleep 3
+    sleep "$interval"
   done
-}
-
-# Apply the ExtractingRequestHandler via Config API and print error body on 
failure
-apply_extract_handler() {
-  local collection="$1"
-  local 
json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}";
-  local url="http://localhost:${SOLR_PORT}/solr/${collection}/config";
-  # Capture body and status code
-  local resp code body
-  sleep 5
-  resp=$(curl -s -S -w "\n%{http_code}" -X POST -H 
'Content-type:application/json' -d "$json" "$url")
-  code="${resp##*$'\n'}"
-  body="${resp%$'\n'*}"
-  if [ "$code" = "200" ]; then
-    return 0
-  else
-    echo "Config API error applying ExtractingRequestHandler to ${collection} 
(HTTP ${code}): ${body}" >&3
-    return 1
-  fi
+  
+  return 1  # Timeout reached
 }
diff --git a/solr/packaging/test/test_extraction.bats 
b/solr/packaging/test/test_extraction.bats
index 865298331a0..12c4b72840a 100644
--- a/solr/packaging/test/test_extraction.bats
+++ b/solr/packaging/test/test_extraction.bats
@@ -18,6 +18,25 @@
 
 load bats_helper
 
+# Apply the ExtractingRequestHandler via Config API and print error body on 
failure
+apply_extract_handler() {
+  local collection="$1"
+  local 
json="{\"add-requesthandler\":{\"name\":\"/update/extract\",\"class\":\"org.apache.solr.handler.extraction.ExtractingRequestHandler\",\"tikaserver.url\":\"http://localhost:${TIKA_PORT}\",\"defaults\":{\"lowernames\":\"true\",\"captureAttr\":\"true\"}}}";
+  local url="http://localhost:${SOLR_PORT}/solr/${collection}/config";
+  # Capture body and status code
+  local resp code body
+  sleep 5
+  resp=$(curl -s -S -w "\n%{http_code}" -X POST -H 
'Content-type:application/json' -d "$json" "$url")
+  code="${resp##*$'\n'}"
+  body="${resp%$'\n'*}"
+  if [ "$code" = "200" ]; then
+    return 0
+  else
+    echo "Config API error applying ExtractingRequestHandler to ${collection} 
(HTTP ${code}): ${body}" >&3
+    return 1
+  fi
+}
+
 setup_file() {
   if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then
     export TIKA_PORT=$((SOLR_PORT+5))
@@ -25,7 +44,7 @@ setup_file() {
     echo "Tika Server started on port ${TIKA_PORT}" >&3
   else
     export DOCKER_UNAVAILABLE=1
-    echo "WARNING: Docker not available (CLI missing or daemon not running); 
Tika-dependent tests will be bypassed and marked as passed." >&3
+    echo "WARNING: Docker not available (CLI missing or daemon not running); 
Tika-dependent tests will be bypassed." >&3
   fi
 }
 
@@ -51,8 +70,7 @@ teardown() {
 @test "using curl to extract a single pdf file" {
 
   if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
-    echo "WARNING: Docker not available; bypassing test." >&3
-    return 0
+    skip "Docker is not available"
   fi
 
   # Disable security manager to allow extraction
@@ -61,7 +79,7 @@ teardown() {
   solr start -Dsolr.modules=extraction
 
   solr create -c gettingstarted -d _default
-  wait_for_collection gettingstarted 30
+  wait_for 30 3 curl -s -S -f 
"http://localhost:${SOLR_PORT}/solr/gettingstarted/select?q=*:*"; -o /dev/null
   apply_extract_handler gettingstarted
 
   curl 
"http://localhost:${SOLR_PORT}/solr/gettingstarted/update/extract?literal.id=doc1&commit=true";
 -F "myfile=@${SOLR_TIP}/example/exampledocs/solr-word.pdf"
@@ -73,8 +91,7 @@ teardown() {
 @test "using the bin/solr post tool to extract content from pdf" {
 
   if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
-    echo "WARNING: Docker not available; bypassing test." >&3
-    return 0
+    skip "Docker is not available"
   fi
 
   # Disable security manager to allow extraction
@@ -83,7 +100,7 @@ teardown() {
   solr start -Dsolr.modules=extraction
 
   solr create -c content_extraction -d _default
-  wait_for_collection content_extraction 30
+  wait_for 30 3 curl -s -S -f 
"http://localhost:${SOLR_PORT}/solr/content_extraction/select?q=*:*"; -o 
/dev/null
   apply_extract_handler content_extraction
 
   # We filter to pdf to invoke the Extract handler.
@@ -99,8 +116,7 @@ teardown() {
 @test "using the bin/solr post tool to crawl web site" {
 
   if [ -n "${DOCKER_UNAVAILABLE:-}" ]; then
-    echo "WARNING: Docker not available; bypassing test." >&3
-    return 0
+    skip "Docker is not available"
   fi
 
   # Disable security manager to allow extraction
@@ -109,7 +125,7 @@ teardown() {
   solr start -Dsolr.modules=extraction
 
   solr create -c website_extraction -d _default
-  wait_for_collection website_extraction 30
+  wait_for 30 3 curl -s -S -f 
"http://localhost:${SOLR_PORT}/solr/website_extraction/select?q=*:*"; -o 
/dev/null
   apply_extract_handler website_extraction
 
   # Change to --recursive 1 to crawl multiple pages, but may be too slow.
diff --git a/solr/packaging/test/test_rolling_upgrade.bats 
b/solr/packaging/test/test_rolling_upgrade.bats
new file mode 100644
index 00000000000..3902189eeb3
--- /dev/null
+++ b/solr/packaging/test/test_rolling_upgrade.bats
@@ -0,0 +1,172 @@
+#!/usr/bin/env bats
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+load bats_helper
+
+# You can test alternative images via 
+# export SOLR_BEGIN_IMAGE="apache/solr-nightly:9.9.0-slim" and then running
+# ./gradlw iTest --tests test_docker_solrcloud.bats
+SOLR_BEGIN_IMAGE="${SOLR_BEGIN_IMAGE:-apache/solr-nightly:9.10.0-SNAPSHOT-slim}"
+SOLR_END_IMAGE="${SOLR_END_IMAGE:-apache/solr-nightly:10.0.0-SNAPSHOT-slim}"
+
+setup() {
+  common_clean_setup
+
+  # Pre-checks
+  if ! command -v docker >/dev/null 2>&1 || ! docker info >/dev/null 2>&1; then
+    skip "Docker is not available"
+  fi
+  docker pull "$SOLR_BEGIN_IMAGE" || skip "Docker image $SOLR_BEGIN_IMAGE is 
not available"
+  docker pull "$SOLR_END_IMAGE" || skip "Docker image $SOLR_END_IMAGE is not 
available"
+
+  # Record test start time for scoping logs on failure
+  TEST_STARTED_AT_ISO=$(date -Iseconds)
+  export TEST_STARTED_AT_ISO
+
+  # Persist artifacts under Gradle’s test-output
+  ARTIFACT_DIR="${TEST_OUTPUT_DIR}/docker"
+  mkdir -p "$ARTIFACT_DIR"
+  export ARTIFACT_DIR
+}
+
+teardown() {
+  failed=$([[ -z "${BATS_TEST_COMPLETED:-}" ]] && [[ -z 
"${BATS_TEST_SKIPPED:-}" ]] && echo 1 || echo 0)
+  if [[ "$failed" -eq 1 ]]; then
+    echo "# Test failed - capturing Docker diagnostics" >&3
+    echo "# === docker ps (summary) ===" >&3
+    docker ps -a --format 'table 
{{.Names}}\t{{.Status}}\t{{.Image}}\t{{.Ports}}' >&3 2>&3 || true
+  fi
+
+  for container in solr-node1 solr-node2 solr-node3; do
+    if docker ps -a --format '{{.Names}}' | grep -q "^${container}$" 
2>/dev/null; then
+      if [[ "$failed" -eq 1 ]]; then
+        echo "# === Docker logs for $container ===" >&3
+        docker logs --timestamps --since "$TEST_STARTED_AT_ISO" "$container" 
>&3 2>&3 || echo "# Failed to get logs for $container" >&3
+        echo "# === Docker inspect for $container ===" >&3
+        docker inspect "$container" | jq '.[] | {Name: .Name, State: .State, 
Ports: .NetworkSettings.Ports}' >&3 2>&3 || true
+      fi
+      # Persist artifacts
+      docker logs --timestamps "$container" >"$ARTIFACT_DIR/${container}.log" 
2>&1 || true
+      docker inspect "$container" >"$ARTIFACT_DIR/${container}.inspect.json" 
2>&1 || true
+      docker exec "$container" ps aux >"$ARTIFACT_DIR/${container}.ps.txt" 
2>&1 || true
+    fi
+  done
+
+  echo "# Docker artifacts saved to: $ARTIFACT_DIR" >&3
+
+  docker stop solr-node1 solr-node2 solr-node3 2>/dev/null || true
+  docker rm solr-node1 solr-node2 solr-node3 2>/dev/null || true
+  docker volume rm solr-data1 solr-data2 solr-data3 2>/dev/null || true
+  docker network rm solrcloud-test 2>/dev/null || true
+}
+
+@test "Docker SolrCloud rolling upgrade" {
+  # Networking & volumes
+  docker network create solrcloud-test
+  docker volume create solr-data1
+  docker volume create solr-data2
+  docker volume create solr-data3
+
+  echo "Starting solr-node1 with embedded ZooKeeper"
+  docker run --name solr-node1 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data1:/var/solr \
+    "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node1 -p 8983
+  docker exec solr-node1 solr assert --started http://solr-node1:8983 
--timeout 10000
+
+  # start next 2 in parallel
+
+  echo "Starting solr-node2 connected to first node's ZooKeeper"
+  docker run --name solr-node2 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data2:/var/solr \
+    "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node2 -p 8984 -z 
solr-node1:9983
+
+  echo "Started solr-node3 connected to first node's ZooKeeper"
+  docker run --name solr-node3 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data3:/var/solr \
+    "$SOLR_BEGIN_IMAGE" solr start -f -c -m 200m --host solr-node3 -p 8985 -z 
solr-node1:9983
+
+  docker exec solr-node2 solr assert --started http://solr-node2:8984 
--timeout 30000
+  docker exec solr-node3 solr assert --started http://solr-node3:8985 
--timeout 30000
+
+  echo "Creating a Collection"
+  docker exec --user=solr solr-node1 solr create -c test-collection -n 
techproducts --shards 3
+
+  echo "Checking collection health"
+  wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
+
+  echo "Add some sample data"
+  docker exec --user=solr solr-node1 solr post -c test-collection 
example/exampledocs/mem.xml
+  assert_success
+
+  # Begin rolling upgrade - upgrade node 3 first (reverse order: 3, 2, 1)
+  echo "Starting rolling upgrade - upgrading node 3"
+  docker stop solr-node3
+  docker rm solr-node3
+  docker run --name solr-node3 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data3:/var/solr \
+    "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node3 -p 8985 -z 
solr-node1:9983
+  docker exec solr-node3 solr assert --started http://solr-node3:8985 
--timeout 30000
+  assert_success
+
+  # Upgrade node 2 second
+  echo "Upgrading node 2"
+  docker stop solr-node2
+  docker rm solr-node2
+  docker run --name solr-node2 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data2:/var/solr \
+    "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node2 -p 8984 -z 
solr-node1:9983
+  docker exec solr-node2 solr assert --started http://solr-node2:8984 
--timeout 30000
+  assert_success
+
+  echo "Upgrading node 1 (ZK node)"
+  docker stop solr-node1
+  docker rm solr-node1
+  docker run --name solr-node1 -d \
+    --network solrcloud-test \
+    --memory=400m \
+    --platform linux/amd64 \
+    -v solr-data1:/var/solr \
+    "$SOLR_END_IMAGE" solr start -f -m 200m --host solr-node1 -p 8983
+  docker exec solr-node1 solr assert --started http://solr-node1:8983 
--timeout 30000
+  assert_success
+
+  # Final collection health check
+  wait_for 30 1 docker exec solr-node1 solr healthcheck -c test-collection
+
+  echo "checking cluster has exactly 3 live nodes"
+  run docker exec solr-node1 curl -s 
"http://solr-node1:8983/solr/admin/collections?action=CLUSTERSTATUS";
+  assert_success
+  
+  local live_nodes_count=$(echo "$output" | jq -r '.cluster.live_nodes | 
length')
+  assert_equal "$live_nodes_count" "3"
+
+}
diff --git a/solr/packaging/test/test_start_solr.bats 
b/solr/packaging/test/test_start_solr.bats
index 27f1fe9df03..6d978bd146d 100644
--- a/solr/packaging/test/test_start_solr.bats
+++ b/solr/packaging/test/test_start_solr.bats
@@ -60,6 +60,8 @@ teardown() {
   # for start/stop/restart we parse the args separate from picking the command
   # which means you don't get an error message for passing a start arg, like 
--jvm-opts to a stop commmand.
 
+  # Pre-check
+  timeout || skip "timeout utility is not available"
   # Set a timeout duration (in seconds)
   TIMEOUT_DURATION=2
 

Reply via email to