This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new ceb23e111 NUTCH-3145 Upgrade to JUnit 6 (#883)
ceb23e111 is described below

commit ceb23e111caa8eb079f703a0fca04ca934cfb29f
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Sat Feb 21 08:26:32 2026 -0800

    NUTCH-3145 Upgrade to JUnit 6 (#883)
---
 .github/workflows/master-build.yml                 | 108 ++++++++++-
 build.xml                                          |  20 +-
 default.properties                                 |  12 +-
 docker/Dockerfile                                  |   8 +-
 ivy/ivy.xml                                        |  20 +-
 src/plugin/build-plugin.xml                        |   2 +
 src/test/junit-platform.properties                 |  31 +++
 .../org/apache/nutch/crawl/CrawlDBTestUtil.java    |  41 +++-
 src/test/org/apache/nutch/fetcher/TestFetcher.java |  93 ++++++---
 .../apache/nutch/segment/TestSegmentMerger.java    |  57 ++++--
 .../nutch/util/CancellationAwareTestUtils.java     | 209 +++++++++++++++++++++
 .../org/apache/nutch/util/WritableTestUtils.java   |  13 +-
 12 files changed, 541 insertions(+), 73 deletions(-)

diff --git a/.github/workflows/master-build.yml 
b/.github/workflows/master-build.yml
index d73bb3a69..1fe9da252 100644
--- a/.github/workflows/master-build.yml
+++ b/.github/workflows/master-build.yml
@@ -20,6 +20,16 @@ on:
   pull_request:
     types: [opened, synchronize, reopened]
     branches: [master]
+
+# Java Version Strategy:
+# - BUILD: Requires Java 17+ (JUnit 6 dependency)
+# - RUNTIME: Supports Java 11+ (javac.version=11 produces Java 11 bytecode)
+#
+# The 'build' job verifies bytecode compilation for both Java 11 and 17 
targets.
+# The 'runtime-java11' job verifies the built artifacts actually run on Java 
11.
+# The 'tests' job runs on JDK 17 (required by JUnit 6) with the default
+# javac.version=11 bytecode target for backward compatibility.
+
 jobs:
   javadoc:
     strategy:
@@ -43,6 +53,7 @@ jobs:
             ${{ runner.os }}-ivy-
       - name: Javadoc
         run: ant clean javadoc -buildfile build.xml
+
   rat:
     strategy:
       matrix:
@@ -73,19 +84,108 @@ jobs:
       - name: Fail if any unknown licenses
         if: ${{ env.UNKNOWN_LICENSES != '0 Unknown Licenses' }}
         run: exit 1
+
+  # Build verification with Java bytecode target matrix
+  # Verifies bytecode compatibility for both Java 11 and Java 17 targets
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        javac-version: ['11', '17']
+        os: [ubuntu-latest]
+    runs-on: ${{ matrix.os }}
+    name: build (javac.version=${{ matrix.javac-version }})
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up JDK 17
+        uses: actions/setup-java@v5
+        with:
+          java-version: '17'
+          distribution: 'temurin'
+      - name: Cache Ivy dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.ivy2/cache
+          key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 
'src/plugin/**/ivy.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-ivy-
+      - name: Build with javac.version=${{ matrix.javac-version }}
+        run: ant clean runtime -Djavac.version=${{ matrix.javac-version }} 
-buildfile build.xml
+      - name: Verify bytecode version
+        run: |
+          # Extract and verify the bytecode version of compiled classes
+          # Java 11 = major version 55, Java 17 = major version 61
+          EXPECTED_VERSION=${{ matrix.javac-version == '11' && '55' || '61' }}
+          echo "Expected major version: $EXPECTED_VERSION (Java ${{ 
matrix.javac-version }})"
+          
+          # Find a real class file (exclude package-info.class which may have 
different version)
+          cd build/classes
+          CLASS_FILE=$(find . -name "*.class" ! -name "package-info.class" | 
head -1)
+          if [ -n "$CLASS_FILE" ]; then
+            echo "Checking: $CLASS_FILE"
+            ACTUAL_VERSION=$(javap -verbose "$CLASS_FILE" 2>/dev/null | grep 
"major version" | awk '{print $NF}')
+            echo "Actual major version: $ACTUAL_VERSION"
+            if [ "$ACTUAL_VERSION" != "$EXPECTED_VERSION" ]; then
+              echo "ERROR: Bytecode version mismatch!"
+              exit 1
+            fi
+            echo "Bytecode version verified successfully"
+          else
+            echo "ERROR: No class files found"
+            exit 1
+          fi
+
+  # Verify runtime compatibility on Java 11
+  # This ensures the built artifacts can actually run on Java 11
+  runtime-java11:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - name: Set up JDK 17 for building
+        uses: actions/setup-java@v5
+        with:
+          java-version: '17'
+          distribution: 'temurin'
+      - name: Cache Ivy dependencies
+        uses: actions/cache@v4
+        with:
+          path: ~/.ivy2/cache
+          key: ${{ runner.os }}-ivy-${{ hashFiles('ivy/ivy.xml', 
'src/plugin/**/ivy.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-ivy-
+      - name: Build with Java 11 target
+        run: ant clean runtime -Djavac.version=11 -buildfile build.xml
+      - name: Set up JDK 11 for runtime verification
+        uses: actions/setup-java@v5
+        with:
+          java-version: '11'
+          distribution: 'temurin'
+      - name: Verify runtime on Java 11
+        run: |
+          echo "Verifying Nutch can run on Java 11..."
+          java -version
+          cd runtime/local
+          # Actually load Java classes by running showproperties
+          # This invokes org.apache.nutch.tools.ShowProperties and verifies 
the JAR loads
+          bin/nutch showproperties | head -20
+          echo "Java 11 runtime verification complete"
+
+  # Tests run on JDK 17 (required by JUnit 6) with default javac.version=11
+  # Java 11 runtime compatibility is verified by the runtime-java11 job
   tests:
     strategy:
+      fail-fast: false
       matrix:
-        java: ['17']
         os: [ubuntu-latest, macos-latest]
     runs-on: ${{ matrix.os }}
     timeout-minutes: 45
     steps:
       - uses: actions/checkout@v5
-      - name: Set up JDK ${{ matrix.java }}
+      - name: Set up JDK 17
         uses: actions/setup-java@v5
         with:
-          java-version: ${{ matrix.java }}
+          java-version: '17'
           distribution: 'temurin'
       - name: Cache Ivy dependencies
         uses: actions/cache@v4
@@ -139,4 +239,4 @@ jobs:
           path: |
             ./build/test/TEST-*.xml
             ./build/**/test/TEST-*.xml
-          retention-days: 1
\ No newline at end of file
+          retention-days: 1
diff --git a/build.xml b/build.xml
index 38e549797..277225d24 100644
--- a/build.xml
+++ b/build.xml
@@ -50,9 +50,19 @@
 
   <property name="ant-eclipse.jar" 
value="${ivy.dir}/lib/ant-eclipse-1.0-jvm1.2.jar" />
 
-  <condition property="using.jdk.11">
-    <matches string="${java.version}" pattern="11.+" casesensitive="false" />
-  </condition>
+  <!--
+    Java Version Strategy (see HADOOP-18887 for similar approach):
+    
+    BUILD REQUIREMENT: Java 17+ is required to build Nutch and run unit tests
+    because JUnit 6 (Jupiter) requires Java 17+.
+    
+    RUNTIME COMPATIBILITY: The compiled bytecode targets Java 11 by default
+    (javac.version=11 in default.properties), allowing the binary package
+    to run on Java 11+ environments. This is important for Hadoop clusters
+    that may not yet support Java 17 runtime.
+    
+    To build with Java 17 bytecode target: ant -Djavac.version=17 ...
+  -->
 
   <!-- the normal classpath -->
   <path id="classpath">
@@ -201,7 +211,6 @@
           otherwise the Javascript search is broken,
           see https://bugs.openjdk.org/browse/JDK-8215291
       -->
-      <arg value="--no-module-directories" if:set="using.jdk.11"/>
 
       <packageset dir="${src.dir}"/>
       <packageset dir="${plugins.dir}/creativecommons/src/java"/>
@@ -501,6 +510,7 @@
           <sysproperty key="test.build.data" value="${test.build.data}"/>
           <sysproperty key="test.src.dir" value="${test.src.dir}"/>
           <sysproperty key="test.include.slow" value="${test.include.slow}"/>
+          <sysproperty key="junit.platform.execution.failfast.enabled" 
value="${test.failfast}"/>
           <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" 
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
         </fork>
         <fileset dir="${test.build.classes}">
@@ -516,6 +526,7 @@
           <sysproperty key="test.build.data" value="${test.build.data}"/>
           <sysproperty key="test.src.dir" value="${test.src.dir}"/>
           <sysproperty key="test.include.slow" value="${test.include.slow}"/>
+          <sysproperty key="junit.platform.execution.failfast.enabled" 
value="${test.failfast}"/>
           <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" 
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
         </fork>
         <fileset dir="${test.build.classes}">
@@ -653,7 +664,6 @@
           otherwise the Javascript search is broken,
           see https://bugs.openjdk.org/browse/JDK-8215291
       -->
-      <arg value="--no-module-directories" if:set="using.jdk.11"/>
 
       <packageset dir="${src.dir}"/>
       <packageset dir="${plugins.dir}/creativecommons/src/java"/>
diff --git a/default.properties b/default.properties
index 09bfa5594..68a9b304d 100644
--- a/default.properties
+++ b/default.properties
@@ -39,10 +39,14 @@ test.build.data =  ${test.build.dir}/data
 test.build.classes = ${test.build.dir}/classes
 test.build.javadoc = ${test.build.dir}/docs/api
 
+# JUnit 6 fail-fast mode - stop on first test failure (true/false)
+# Enable with: ant test -Dtest.failfast=true
+test.failfast = false
+
 # Proxy Host and Port to use for building JavaDoc
 javadoc.proxy.host=-J-DproxyHost=
 javadoc.proxy.port=-J-DproxyPort=
-javadoc.link.java=https://docs.oracle.com/en/java/javase/11/docs/api/
+javadoc.link.java=https://docs.oracle.com/en/java/javase/17/docs/api/
 javadoc.link.hadoop=https://hadoop.apache.org/docs/r3.4.2/api/
 javadoc.packages=org.apache.nutch.*
 
@@ -53,6 +57,12 @@ bin.dist.version.dir=${dist.dir}/${final.name}-bin
 javac.debug=on
 javac.optimize=on
 javac.deprecation=on
+
+# Java bytecode target version for compiled classes.
+# Set to 11 for backward-compatible runtime (works on Java 11+).
+# Note: Building and running tests requires Java 17+ (JUnit 6 requirement),
+# but the compiled artifacts will run on Java 11+.
+# Override with: ant -Djavac.version=17 to target Java 17 bytecode.
 javac.version=11
 
 runtime.dir=./runtime
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2eb218bad..93985f228 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -42,12 +42,12 @@ WORKDIR /root/
 
 # Install dependencies
 RUN apk update
-RUN apk --no-cache add apache-ant bash git openjdk11 supervisor
+RUN apk --no-cache add apache-ant bash git openjdk17 supervisor
 
 # Establish environment variables
-RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk' >> $HOME/.bashrc
-RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-11-openjdk' >> $HOME/.ashrc
-ENV JAVA_HOME='/usr/lib/jvm/java-11-openjdk'
+RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-17-openjdk' >> $HOME/.bashrc
+RUN echo 'export JAVA_HOME=/usr/lib/jvm/java-17-openjdk' >> $HOME/.ashrc
+ENV JAVA_HOME='/usr/lib/jvm/java-17-openjdk'
 ENV NUTCH_HOME='/root/nutch_source/runtime/local'
 
 # Checkout and build the Nutch master branch (1.x)
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 9b38d2fa9..06e269bf5 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -116,15 +116,17 @@
                        <exclude org="org.gnu.inet" module="libidn" /><!-- LGPL 
2.1 -->
                </dependency>
 
-        <dependency org="org.hamcrest" name="hamcrest" rev="3.0" 
conf="test->default"/>
-        <!-- Required for <junitlauncher> task -->
-        <dependency org="org.junit.platform" name="junit-platform-launcher" 
rev="1.14.1" conf="test->default"/>
-        <!-- Required for JUnit 5 (Jupiter) test execution -->
-        <dependency org="org.junit.jupiter" name="junit-jupiter-engine" 
rev="5.14.1" conf="test->default"/>
-        <dependency org="org.junit.jupiter" name="junit-jupiter-api" 
rev="5.14.1" conf="test->default"/>
-        <!-- Mockito for mocking in tests -->
-        <dependency org="org.mockito" name="mockito-core" rev="5.18.0" 
conf="test->default"/>
-        <dependency org="org.mockito" name="mockito-junit-jupiter" 
rev="5.18.0" conf="test->default"/>
+    <dependency org="org.hamcrest" name="hamcrest" rev="3.0" 
conf="test->default"/>
+    <!-- JSpecify nullability annotations for improved null safety -->
+    <dependency org="org.jspecify" name="jspecify" rev="1.0.0" 
conf="*->default"/>
+    <!-- Required for <junitlauncher> task -->
+    <dependency org="org.junit.platform" name="junit-platform-launcher" 
rev="6.0.3" conf="test->default"/>
+    <!-- Required for JUnit 6 (Jupiter) test execution -->
+    <dependency org="org.junit.jupiter" name="junit-jupiter-engine" 
rev="6.0.3" conf="test->default"/>
+    <dependency org="org.junit.jupiter" name="junit-jupiter-api" rev="6.0.3" 
conf="test->default"/>
+    <!-- Mockito for mocking in tests -->
+    <dependency org="org.mockito" name="mockito-core" rev="5.18.0" 
conf="test->default"/>
+    <dependency org="org.mockito" name="mockito-junit-jupiter" rev="5.18.0" 
conf="test->default"/>
 
                <!-- Jetty used to serve test pages for unit tests, but is also 
provided as dependency of Hadoop -->
                <dependency org="org.eclipse.jetty" name="jetty-server" 
rev="12.1.5" conf="test->default">
diff --git a/src/plugin/build-plugin.xml b/src/plugin/build-plugin.xml
index b0aca7103..f1787ed03 100755
--- a/src/plugin/build-plugin.xml
+++ b/src/plugin/build-plugin.xml
@@ -218,6 +218,7 @@
           <jvmarg value="-Xmx1000m"/>
           <sysproperty key="test.data" value="${build.test}/data"/>
           <sysproperty key="test.input" value="${root}/data"/>
+          <sysproperty key="junit.platform.execution.failfast.enabled" 
value="${test.failfast}"/>
           <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" 
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
         </fork>
         <fileset dir="${build.test}">
@@ -232,6 +233,7 @@
           <jvmarg value="-Xmx1000m"/>
           <sysproperty key="test.data" value="${build.test}/data"/>
           <sysproperty key="test.input" value="${root}/data"/>
+          <sysproperty key="junit.platform.execution.failfast.enabled" 
value="${test.failfast}"/>
           <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" 
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
         </fork>
         <fileset dir="${basedir}">
diff --git a/src/test/junit-platform.properties 
b/src/test/junit-platform.properties
new file mode 100644
index 000000000..b2f5ccaea
--- /dev/null
+++ b/src/test/junit-platform.properties
@@ -0,0 +1,31 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# JUnit Platform Configuration
+# See: 
https://junit.org/junit5/docs/current/user-guide/#running-tests-config-params
+
+# Enable fail-fast mode - stops test execution on first failure
+# This provides faster feedback during development
+# Set to false or remove this line to run all tests even if some fail
+junit.platform.execution.failfast.enabled=false
+
+# Display names for tests - use method names by default
+junit.jupiter.displayname.generator.default=org.junit.jupiter.api.DisplayNameGenerator$Standard
+
+# Timeout configuration for individual tests (can be overridden with @Timeout)
+# junit.jupiter.execution.timeout.default=5m
+
+# Parallel execution configuration (disabled by default for deterministic 
results)
+junit.jupiter.execution.parallel.enabled=false
diff --git a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java 
b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
index 9e96071a0..581b528f3 100644
--- a/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
+++ b/src/test/org/apache/nutch/crawl/CrawlDBTestUtil.java
@@ -24,6 +24,8 @@ import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import org.jspecify.annotations.NonNull;
+import org.jspecify.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -53,6 +55,10 @@ import org.eclipse.jetty.server.ServerConnector;
 import org.eclipse.jetty.server.handler.ContextHandler;
 import org.eclipse.jetty.server.handler.ResourceHandler;
 
+/**
+ * Test utility for creating and manipulating CrawlDb instances.
+ * Uses JSpecify annotations for null safety.
+ */
 public class CrawlDBTestUtil {
 
   private static final Logger LOG = LoggerFactory
@@ -62,6 +68,8 @@ public class CrawlDBTestUtil {
   /**
    * Creates synthetic crawldb
    * 
+   * @param conf
+   *          configuration to use
    * @param fs
    *          filesystem where db will be created
    * @param crawldb
@@ -70,8 +78,8 @@ public class CrawlDBTestUtil {
    *          urls to be inserted, objects are of type URLCrawlDatum
    * @throws Exception
    */
-  public static void createCrawlDb(Configuration conf, FileSystem fs,
-      Path crawldb, List<URLCrawlDatum> init) throws Exception {
+  public static void createCrawlDb(@NonNull Configuration conf, @NonNull 
FileSystem fs,
+      @NonNull Path crawldb, @NonNull List<URLCrawlDatum> init) throws 
Exception {
     LOG.trace("* creating crawldb: {}", crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
     Option wKeyOpt = MapFile.Writer.keyClass(Text.class);
@@ -366,8 +374,9 @@ public class CrawlDBTestUtil {
    * override the default one and it is currently not possible to use
    * dynamically set values.
    * 
-   * @return
+   * @return a new Reducer Context with test configuration
    */
+  @NonNull
   public static Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context 
createContext() {
     DummyContext context = new DummyContext();
     Configuration conf = context.getConfiguration();
@@ -376,13 +385,16 @@ public class CrawlDBTestUtil {
     return (Reducer<Text, CrawlDatum, Text, CrawlDatum>.Context) context;
   }
 
+  /** Container for URL and CrawlDatum pairs used in test data. */
   public static class URLCrawlDatum {
 
+    @NonNull
     public Text url;
 
+    @NonNull
     public CrawlDatum datum;
 
-    public URLCrawlDatum(Text url, CrawlDatum datum) {
+    public URLCrawlDatum(@NonNull Text url, @NonNull CrawlDatum datum) {
       this.url = url;
       this.datum = datum;
     }
@@ -391,20 +403,27 @@ public class CrawlDBTestUtil {
   /**
    * Generate seedlist
    * 
+   * @param fs filesystem to use
+   * @param urlPath path where seed file will be created
+   * @param urls list of URLs to write
    * @throws IOException
    */
-  public static void generateSeedList(FileSystem fs, Path urlPath,
-      List<String> urls) throws IOException {
+  public static void generateSeedList(@NonNull FileSystem fs, @NonNull Path 
urlPath,
+      @NonNull List<String> urls) throws IOException {
     generateSeedList(fs, urlPath, urls, new ArrayList<String>());
   }
 
   /**
-   * Generate seedlist
+   * Generate seedlist with optional metadata
    * 
+   * @param fs filesystem to use
+   * @param urlPath path where seed file will be created
+   * @param urls list of URLs to write
+   * @param metadata optional metadata for each URL
    * @throws IOException
    */
-  public static void generateSeedList(FileSystem fs, Path urlPath,
-      List<String> urls, List<String> metadata) throws IOException {
+  public static void generateSeedList(@NonNull FileSystem fs, @NonNull Path 
urlPath,
+      @NonNull List<String> urls, @NonNull List<String> metadata) throws 
IOException {
     FSDataOutputStream out;
     Path file = new Path(urlPath, "urls.txt");
     fs.mkdirs(urlPath);
@@ -439,9 +458,11 @@ public class CrawlDBTestUtil {
    *          port to listen to
    * @param staticContent
    *          folder where static content lives
+   * @return configured Jetty server instance
    * @throws UnknownHostException
    */
-  public static Server getServer(int port, String staticContent)
+  @NonNull
+  public static Server getServer(int port, @NonNull String staticContent)
       throws UnknownHostException {
     Server webServer = new Server();
 
diff --git a/src/test/org/apache/nutch/fetcher/TestFetcher.java 
b/src/test/org/apache/nutch/fetcher/TestFetcher.java
index f25cab545..176a88a52 100644
--- a/src/test/org/apache/nutch/fetcher/TestFetcher.java
+++ b/src/test/org/apache/nutch/fetcher/TestFetcher.java
@@ -28,14 +28,18 @@ import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.metadata.Nutch;
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.protocol.Content;
+import org.apache.nutch.util.CancellationAwareTestUtils;
+import org.apache.nutch.util.CancellationAwareTestUtils.CancellationToken;
 import org.eclipse.jetty.server.Server;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.concurrent.TimeUnit;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -44,6 +48,8 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
  * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
  * Verify contents
  * 
+ * <p>This test is cancellation-aware and will exit gracefully if the test
+ * suite is stopped early (e.g., due to fail-fast mode).</p>
  */
 public class TestFetcher {
 
@@ -81,7 +87,10 @@ public class TestFetcher {
   }
 
   @Test
+  @Timeout(value = 5, unit = TimeUnit.MINUTES)
   public void testFetch() throws IOException, ClassNotFoundException, 
InterruptedException {
+    // Create cancellation token for graceful shutdown support
+    CancellationToken cancellationToken = 
CancellationAwareTestUtils.createToken();
 
     // generate seedlist
     ArrayList<String> urls = new ArrayList<String>();
@@ -95,15 +104,22 @@ public class TestFetcher {
 
     CrawlDBTestUtil.generateSeedList(fs, urlPath, urls);
 
+    // Check for cancellation before long-running operations
+    cancellationToken.throwIfCancelled();
+
     // inject
     Injector injector = new Injector(conf);
     injector.inject(crawldbPath, urlPath);
 
+    cancellationToken.throwIfCancelled();
+
     // generate
     Generator g = new Generator(conf);
     Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1,
         Long.MAX_VALUE, Long.MAX_VALUE, false, false, false, 1, null);
 
+    cancellationToken.throwIfCancelled();
+
     long time = System.currentTimeMillis();
     // fetch
     Fetcher fetcher = new Fetcher(conf);
@@ -115,6 +131,11 @@ public class TestFetcher {
 
     time = System.currentTimeMillis() - time;
 
+    // Skip verification if cancelled
+    if (cancellationToken.isCancelled()) {
+      return;
+    }
+
     // verify politeness, time taken should be more than (num_of_pages 
+1)*delay
     int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
         "fetcher.server.delay", 5));
@@ -127,18 +148,28 @@ public class TestFetcher {
 
     ArrayList<String> handledurls = new ArrayList<String>();
 
-    READ_CONTENT: do {
-      Text key = new Text();
-      Content value = new Content();
-      if (!reader.next(key, value))
-        break READ_CONTENT;
-      String contentString = new String(value.getContent());
-      if (contentString.indexOf("Nutch fetcher test page") != -1) {
-        handledurls.add(key.toString());
-      }
-    } while (true);
+    try {
+      READ_CONTENT: do {
+        // Check for cancellation periodically during I/O operations
+        if (cancellationToken.isCancelled()) break READ_CONTENT;
+        
+        Text key = new Text();
+        Content value = new Content();
+        if (!reader.next(key, value))
+          break READ_CONTENT;
+        String contentString = new String(value.getContent());
+        if (contentString.indexOf("Nutch fetcher test page") != -1) {
+          handledurls.add(key.toString());
+        }
+      } while (true);
+    } finally {
+      reader.close();
+    }
 
-    reader.close();
+    // Skip remaining verification if cancelled
+    if (cancellationToken.isCancelled()) {
+      return;
+    }
 
     Collections.sort(urls);
     Collections.sort(handledurls);
@@ -157,22 +188,32 @@ public class TestFetcher {
         new Path(generatedSegment[0], ParseData.DIR_NAME), 
"part-r-00000/data");
     reader = new SequenceFile.Reader(conf, 
SequenceFile.Reader.file(parseData));
 
-    READ_PARSE_DATA: do {
-      Text key = new Text();
-      ParseData value = new ParseData();
-      if (!reader.next(key, value))
-        break READ_PARSE_DATA;
-      // make sure they all contain "nutch.segment.name" and
-      // "nutch.content.digest"
-      // keys in parse metadata
-      Metadata contentMeta = value.getContentMeta();
-      if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null
-          && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
-        handledurls.add(key.toString());
-      }
-    } while (true);
+    try {
+      READ_PARSE_DATA: do {
+        // Check for cancellation periodically
+        if (cancellationToken.isCancelled()) break READ_PARSE_DATA;
+        
+        Text key = new Text();
+        ParseData value = new ParseData();
+        if (!reader.next(key, value))
+          break READ_PARSE_DATA;
+        // make sure they all contain "nutch.segment.name" and
+        // "nutch.content.digest"
+        // keys in parse metadata
+        Metadata contentMeta = value.getContentMeta();
+        if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null
+            && contentMeta.get(Nutch.SIGNATURE_KEY) != null) {
+          handledurls.add(key.toString());
+        }
+      } while (true);
+    } finally {
+      reader.close();
+    }
 
-    reader.close();
+    // Skip final assertions if cancelled
+    if (cancellationToken.isCancelled()) {
+      return;
+    }
 
     Collections.sort(handledurls);
 
diff --git a/src/test/org/apache/nutch/segment/TestSegmentMerger.java 
b/src/test/org/apache/nutch/segment/TestSegmentMerger.java
index 0df88a2de..9cc076ad6 100644
--- a/src/test/org/apache/nutch/segment/TestSegmentMerger.java
+++ b/src/test/org/apache/nutch/segment/TestSegmentMerger.java
@@ -17,6 +17,7 @@
 package org.apache.nutch.segment;
 
 import java.text.DecimalFormat;
+import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -28,16 +29,23 @@ import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
 import org.apache.nutch.parse.ParseText;
+import org.apache.nutch.util.CancellationAwareTestUtils;
+import org.apache.nutch.util.CancellationAwareTestUtils.CancellationToken;
 import org.apache.nutch.util.NutchConfiguration;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
 import org.junit.jupiter.api.BeforeEach;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
+/**
+ * Tests for SegmentMerger functionality.
+ * This test is cancellation-aware for graceful shutdown during fail-fast mode.
+ */
 public class TestSegmentMerger {
   Configuration conf;
   FileSystem fs;
@@ -106,9 +114,19 @@ public class TestSegmentMerger {
   }
 
   @Test
+  @Timeout(value = 10, unit = TimeUnit.MINUTES)
   public void testLargeMerge() throws Exception {
+    // Create cancellation token for graceful shutdown support
+    CancellationToken cancellationToken = 
CancellationAwareTestUtils.createToken();
+
     SegmentMerger merger = new SegmentMerger(conf);
     merger.merge(out, new Path[] { seg1, seg2 }, false, false, -1);
+
+    // Check for cancellation before verification
+    if (cancellationToken.isCancelled()) {
+      return;
+    }
+
     // verify output
     FileStatus[] stats = fs.listStatus(out);
     // there should be just one path
@@ -119,20 +137,37 @@ public class TestSegmentMerger {
     MapFile.Reader[] readers = MapFileOutputFormat.getReaders(new Path(
         outSeg, ParseText.DIR_NAME), conf);
     int cnt1 = 0, cnt2 = 0;
-    for (MapFile.Reader r : readers) {
-      while (r.next(k, v)) {
-        String ks = k.toString();
-        String vs = v.getText();
-        if (ks.startsWith("seg1-")) {
-          cnt1++;
-          assertTrue(vs.startsWith("seg1 "));
-        } else if (ks.startsWith("seg2-")) {
-          cnt2++;
-          assertTrue(vs.startsWith("seg2 "));
+    try {
+      for (MapFile.Reader r : readers) {
+        while (r.next(k, v)) {
+          // Check for cancellation periodically during I/O
+          if (cancellationToken.isCancelled()) {
+            return;
+          }
+          
+          String ks = k.toString();
+          String vs = v.getText();
+          if (ks.startsWith("seg1-")) {
+            cnt1++;
+            assertTrue(vs.startsWith("seg1 "));
+          } else if (ks.startsWith("seg2-")) {
+            cnt2++;
+            assertTrue(vs.startsWith("seg2 "));
+          }
         }
       }
-      r.close();
+    } finally {
+      // Ensure readers are closed even on cancellation
+      for (MapFile.Reader r : readers) {
+        r.close();
+      }
+    }
+
+    // Skip final assertions if cancelled
+    if (cancellationToken.isCancelled()) {
+      return;
     }
+
     assertEquals(countSeg1, cnt1);
     assertEquals(countSeg2, cnt2);
   }
diff --git a/src/test/org/apache/nutch/util/CancellationAwareTestUtils.java 
b/src/test/org/apache/nutch/util/CancellationAwareTestUtils.java
new file mode 100644
index 000000000..055a64261
--- /dev/null
+++ b/src/test/org/apache/nutch/util/CancellationAwareTestUtils.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.util;
+
+import org.jspecify.annotations.NonNull;
+import org.jspecify.annotations.Nullable;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.BooleanSupplier;
+
+/**
+ * Utility class for making long-running tests cancellation-aware.
+ * 
+ * <p>This supports JUnit 6's fail-fast mode by allowing tests to check
+ * for cancellation requests and exit gracefully, ensuring proper resource
+ * cleanup even when the test suite is stopped early.</p>
+ * 
+ * <p>Usage example:</p>
+ * <pre>{@code
+ * @Test
+ * @Timeout(value = 5, unit = TimeUnit.MINUTES)
+ * void testLongRunningOperation() throws Exception {
+ *     CancellationAwareTestUtils.CancellationToken token = 
+ *         CancellationAwareTestUtils.createToken();
+ *     
+ *     try {
+ *         while (hasMoreWork() && !token.isCancelled()) {
+ *             doWork();
+ *         }
+ *     } finally {
+ *         cleanup();
+ *     }
+ * }
+ * }</pre>
+ */
+public class CancellationAwareTestUtils {
+
+    /**
+     * A simple cancellation token that can be checked during long-running 
operations.
+     * The token is automatically cancelled when the current thread is 
interrupted.
+     */
+    public static class CancellationToken {
+        private final AtomicBoolean cancelled = new AtomicBoolean(false);
+        @Nullable
+        private final BooleanSupplier additionalCheck;
+
+        CancellationToken(@Nullable BooleanSupplier additionalCheck) {
+            this.additionalCheck = additionalCheck;
+        }
+
+        /**
+         * Check if cancellation has been requested.
+         * This checks both explicit cancellation and thread interruption.
+         * 
+         * @return true if the operation should be cancelled
+         */
+        public boolean isCancelled() {
+            if (cancelled.get()) {
+                return true;
+            }
+            if (Thread.currentThread().isInterrupted()) {
+                cancelled.set(true);
+                return true;
+            }
+            if (additionalCheck != null && additionalCheck.getAsBoolean()) {
+                cancelled.set(true);
+                return true;
+            }
+            return false;
+        }
+
+        /**
+         * Explicitly request cancellation.
+         */
+        public void cancel() {
+            cancelled.set(true);
+        }
+
+        /**
+         * Throws InterruptedException if cancellation has been requested.
+         * Useful for cooperative cancellation in loops.
+         * 
+         * @throws InterruptedException if cancelled
+         */
+        public void throwIfCancelled() throws InterruptedException {
+            if (isCancelled()) {
+                throw new InterruptedException("Test cancelled");
+            }
+        }
+    }
+
+    /**
+     * Creates a new cancellation token.
+     * 
+     * @return a new CancellationToken instance
+     */
+    @NonNull
+    public static CancellationToken createToken() {
+        return new CancellationToken(null);
+    }
+
+    /**
+     * Creates a cancellation token with an additional cancellation condition.
+     * 
+     * @param additionalCheck additional condition that triggers cancellation
+     * @return a new CancellationToken instance
+     */
+    @NonNull
+    public static CancellationToken createToken(@NonNull BooleanSupplier 
additionalCheck) {
+        return new CancellationToken(additionalCheck);
+    }
+
+    /**
+     * Executes an operation with periodic cancellation checks.
+     * 
+     * @param token the cancellation token to check
+     * @param operation the operation to execute (should be short-lived)
+     * @param iterations number of times to execute the operation
+     * @param checkInterval how often to check for cancellation (every N 
iterations)
+     * @return the number of iterations actually completed
+     * @throws InterruptedException if cancelled during execution
+     */
+    public static int executeWithCancellation(
+            @NonNull CancellationToken token,
+            @NonNull Runnable operation,
+            int iterations,
+            int checkInterval) throws InterruptedException {
+        
+        int completed = 0;
+        for (int i = 0; i < iterations; i++) {
+            if (i % checkInterval == 0) {
+                token.throwIfCancelled();
+            }
+            operation.run();
+            completed++;
+        }
+        return completed;
+    }
+
+    /**
+     * Sleeps for the specified duration while remaining cancellation-aware.
+     * Checks for cancellation every 100ms.
+     * 
+     * @param token the cancellation token
+     * @param millis total milliseconds to sleep
+     * @throws InterruptedException if cancelled or interrupted
+     */
+    public static void sleepWithCancellation(@NonNull CancellationToken token, 
long millis) 
+            throws InterruptedException {
+        long remaining = millis;
+        while (remaining > 0) {
+            token.throwIfCancelled();
+            long sleepTime = Math.min(remaining, 100);
+            Thread.sleep(sleepTime);
+            remaining -= sleepTime;
+        }
+    }
+
+    /**
+     * Interface for operations that can be interrupted and resumed.
+     * 
+     * @param <T> the result type
+     */
+    @FunctionalInterface
+    public interface CancellableOperation<T> {
+        /**
+         * Execute a portion of the operation.
+         * 
+         * @param token cancellation token to check
+         * @return the result, or null if more work is needed
+         * @throws Exception if the operation fails
+         */
+        @Nullable
+        T execute(@NonNull CancellationToken token) throws Exception;
+    }
+
+    /**
+     * Runs a cancellable operation, returning null if cancelled before 
completion.
+     * 
+     * @param <T> the result type
+     * @param operation the operation to run
+     * @return the result, or null if cancelled
+     * @throws Exception if the operation fails (not due to cancellation)
+     */
+    @Nullable
+    public static <T> T runCancellable(@NonNull CancellableOperation<T> 
operation) throws Exception {
+        CancellationToken token = createToken();
+        try {
+            return operation.execute(token);
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            return null;
+        }
+    }
+}
diff --git a/src/test/org/apache/nutch/util/WritableTestUtils.java 
b/src/test/org/apache/nutch/util/WritableTestUtils.java
index d4429dbf3..632849e71 100644
--- a/src/test/org/apache/nutch/util/WritableTestUtils.java
+++ b/src/test/org/apache/nutch/util/WritableTestUtils.java
@@ -18,24 +18,31 @@ package org.apache.nutch.util;
 
 import org.apache.hadoop.io.*;
 import org.apache.hadoop.conf.*;
+import org.jspecify.annotations.NonNull;
+import org.jspecify.annotations.Nullable;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+/**
+ * Utility methods for testing Hadoop Writable implementations.
+ * Uses JSpecify annotations for null safety.
+ */
 public class WritableTestUtils {
 
   /** Utility method for testing writables. */
-  public static void testWritable(Writable before) throws Exception {
+  public static void testWritable(@NonNull Writable before) throws Exception {
     testWritable(before, null);
   }
 
   /** Utility method for testing writables. */
-  public static void testWritable(Writable before, Configuration conf)
+  public static void testWritable(@NonNull Writable before, @Nullable 
Configuration conf)
       throws Exception {
     assertEquals(before, writeRead(before, conf));
   }
 
   /** Utility method for testing writables. */
-  public static Writable writeRead(Writable before, Configuration conf)
+  @NonNull
+  public static Writable writeRead(@NonNull Writable before, @Nullable 
Configuration conf)
       throws Exception {
 
     DataOutputBuffer dob = new DataOutputBuffer();


Reply via email to