This is an automated email from the ASF dual-hosted git repository.

lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new 64ac8b463 NUTCH-3154 Implement integration testing framework for Nutch 
IndexWriter plugins using Testcontainers (#895)
64ac8b463 is described below

commit 64ac8b46347e6d836fc7c8ef693065742bf519e6
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Sat Feb 21 08:42:16 2026 -0800

    NUTCH-3154 Implement integration testing framework for Nutch IndexWriter 
plugins using Testcontainers (#895)
---
 .github/workflows/master-build.yml                 |   6 +
 build.xml                                          |   4 +
 ivy/ivy.xml                                        |   4 +
 src/plugin/build-plugin.xml                        |  31 ++++-
 src/plugin/build.xml                               |  11 ++
 src/plugin/indexer-elastic/ivy.xml                 |   5 +-
 .../indexwriter/elastic/ElasticIndexWriterIT.java  | 113 +++++++++++++++++
 src/plugin/indexer-kafka/ivy.xml                   |   1 +
 .../indexwriter/kafka/KafkaIndexWriterIT.java      |  96 +++++++++++++++
 src/plugin/indexer-rabbit/ivy.xml                  |   6 +-
 .../indexwriter/rabbit/RabbitIndexWriterIT.java    |  90 ++++++++++++++
 src/plugin/indexer-solr/ivy.xml                    |   1 +
 .../nutch/indexwriter/solr/SolrIndexWriterIT.java  | 137 +++++++++++++++++++++
 .../nutch/indexer/AbstractIndexWriterIT.java       |  90 ++++++++++++++
 .../nutch/indexer/IndexWriterIntegrationTest.java  |  53 ++++++++
 15 files changed, 644 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/master-build.yml 
b/.github/workflows/master-build.yml
index 1fe9da252..2056c1e10 100644
--- a/.github/workflows/master-build.yml
+++ b/.github/workflows/master-build.yml
@@ -204,6 +204,8 @@ jobs:
               - 'src/testresources/**'
             plugins:
               - 'src/plugin/**'
+            indexer_plugins:
+              - 'src/plugin/indexer-*/**'
             buildconf:
               - 'build.xml'
               - 'ivy/ivy.xml'
@@ -220,6 +222,10 @@ jobs:
       - name: test plugins
         if: ${{ steps.filter.outputs.plugins == 'true' && 
steps.filter.outputs.core == 'false' && steps.filter.outputs.buildconf == 
'false' }}
         run: ant clean test-plugins -buildfile build.xml
+      # run indexer integration tests when indexer plugin files change (Docker 
required, ubuntu-latest only)
+      - name: test indexer integration
+        if: ${{ steps.filter.outputs.indexer_plugins == 'true' && matrix.os == 
'ubuntu-latest' }}
+        run: ant clean test-indexer-integration -buildfile build.xml
       - name: Check for test results
         id: check_tests
         if: always() && matrix.os == 'ubuntu-latest'
diff --git a/build.xml b/build.xml
index 277225d24..57d44ee49 100644
--- a/build.xml
+++ b/build.xml
@@ -545,6 +545,10 @@
     <ant dir="src/plugin" target="test-single" inheritAll="false"/>
   </target>
 
+  <target name="test-indexer-integration" depends="resolve-test, compile, 
compile-core-test, job" description="--> run indexer plugin integration tests 
(Testcontainers)">
+    <ant dir="src/plugin" target="test-indexer-integration" 
inheritAll="false"/>
+  </target>
+
   <target name="nightly" depends="test, tar-src, zip-src" description="--> run 
the nightly target build">
   </target>
 
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 06e269bf5..b6b8f67a9 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -128,6 +128,10 @@
     <dependency org="org.mockito" name="mockito-core" rev="5.18.0" 
conf="test->default"/>
     <dependency org="org.mockito" name="mockito-junit-jupiter" rev="5.18.0" 
conf="test->default"/>
 
+    <!-- Testcontainers for indexer plugin integration tests -->
+    <dependency org="org.testcontainers" name="testcontainers" rev="2.0.3" 
conf="test->default"/>
+    <dependency org="org.testcontainers" name="junit-jupiter" rev="1.21.4" 
conf="test->default"/>
+
                <!-- Jetty used to serve test pages for unit tests, but is also 
provided as dependency of Hadoop -->
                <dependency org="org.eclipse.jetty" name="jetty-server" 
rev="12.1.5" conf="test->default">
                        <exclude org="ch.qos.reload4j" module="*" />
diff --git a/src/plugin/build-plugin.xml b/src/plugin/build-plugin.xml
index f1787ed03..ef8dda56c 100755
--- a/src/plugin/build-plugin.xml
+++ b/src/plugin/build-plugin.xml
@@ -189,7 +189,7 @@
   <!-- ================================================================== -->
   <!-- Compile test code                                                  --> 
   <!-- ================================================================== -->
-  <target name="compile-test" depends="compile, deps-test-compile" 
if="test.available">
+  <target name="compile-test" depends="resolve-test, compile, 
deps-test-compile" if="test.available">
     <javac 
      encoding="${build.encoding}" 
      srcdir="${src.test}"
@@ -242,7 +242,34 @@
       </testclasses>
     </junitlauncher>
     <fail if="tests.failed">Tests failed!</fail>
-  </target>   
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- Run indexer plugin integration tests (Testcontainers)               -->
+  <!-- ================================================================== -->
+  <target name="test-indexer-integration" depends="compile-test, deploy" 
if="test.available">
+    <echo message="Running indexer integration tests for plugin: ${name}"/>
+    <junitlauncher printSummary="true" haltOnFailure="false" 
failureProperty="integration.tests.failed">
+      <classpath refid="test.classpath"/>
+      <testclasses outputDir="${build.test}">
+        <listener type="legacy-plain" sendSysOut="true" sendSysErr="true"/>
+        <listener type="legacy-xml" sendSysOut="true" sendSysErr="true"/>
+        <fork forkMode="perTestClass">
+          <jvmarg value="-Xmx2000m"/>
+          <sysproperty key="test.data" value="${build.test}/data"/>
+          <sysproperty key="test.input" value="${root}/data"/>
+          <sysproperty key="testcontainers.reuse.enable" value="true"/>
+          <sysproperty key="javax.xml.parsers.DocumentBuilderFactory" 
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+        </fork>
+        <fileset dir="${build.test}">
+          <include name="**/IT*.class"/>
+          <include name="**/*IT.class"/>
+          <include name="**/*IntegrationTest.class"/>
+        </fileset>
+      </testclasses>
+    </junitlauncher>
+    <fail if="integration.tests.failed">Indexer integration tests 
failed!</fail>
+  </target>
 
   <!-- target: resolve  ================================================= -->
   <target name="resolve-default" depends="clean-lib" description="resolve and 
retrieve dependencies with ivy">
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index b7a5a7721..18d00da3b 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -172,6 +172,17 @@
     <ant dir="${plugin}" target="test"/>
   </target>
 
+  <!-- ======================================================  -->
+  <!-- Indexer plugin integration tests (Testcontainers)       -->
+  <!-- Run sequentially to avoid container resource contention -->
+  <!-- ======================================================  -->
+  <target name="test-indexer-integration">
+    <ant dir="indexer-elastic" target="test-indexer-integration"/>
+    <ant dir="indexer-kafka" target="test-indexer-integration"/>
+    <ant dir="indexer-rabbit" target="test-indexer-integration"/>
+    <ant dir="indexer-solr" target="test-indexer-integration"/>
+  </target>
+
   <!-- ====================================================== -->
   <!-- Clean all of the plugins.                              -->
   <!-- ====================================================== -->
diff --git a/src/plugin/indexer-elastic/ivy.xml 
b/src/plugin/indexer-elastic/ivy.xml
index ee812a225..04c1a071d 100644
--- a/src/plugin/indexer-elastic/ivy.xml
+++ b/src/plugin/indexer-elastic/ivy.xml
@@ -36,7 +36,10 @@
   </publications>
 
   <dependencies>
-    <dependency org="org.elasticsearch.client" 
name="elasticsearch-rest-high-level-client" rev="7.10.2"/>
+    <dependency org="org.testcontainers" name="testcontainers-elasticsearch" 
rev="2.0.3" conf="test->default"/>
+    <dependency org="org.elasticsearch.client" 
name="elasticsearch-rest-high-level-client" rev="7.10.2">
+      <exclude org="org.apache.logging.log4j" name="*"/>
+    </dependency>
     <dependency org="org.apache.lucene" name="lucene-analyzers-common" 
rev="8.11.2"/>
     <dependency org="org.apache.lucene" name="lucene-backward-codecs" 
rev="8.11.2"/>
     <dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2"/>
diff --git 
a/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
 
b/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
new file mode 100644
index 000000000..0479213c3
--- /dev/null
+++ 
b/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.elastic;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.http.HttpHost;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.elasticsearch.action.get.GetRequest;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.RestClient;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for ElasticIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class ElasticIndexWriterIT extends AbstractIndexWriterIT {
+
+  private static final String ELASTICSEARCH_IMAGE =
+      "docker.elastic.co/elasticsearch/elasticsearch:7.10.2";
+
+  @Container
+  private static final ElasticsearchContainer elasticsearchContainer =
+      new ElasticsearchContainer(ELASTICSEARCH_IMAGE)
+          .withEnv("discovery.type", "single-node")
+          .withEnv("xpack.security.enabled", "false");
+
+  private ElasticIndexWriter indexWriter;
+  private Configuration conf;
+
+  @Override
+  public void setUpIndexWriter() throws Exception {
+    conf = NutchConfiguration.create();
+    indexWriter = new ElasticIndexWriter();
+    indexWriter.setConf(conf);
+
+    Map<String, String> params = new HashMap<>();
+    params.put(ElasticConstants.HOSTS, elasticsearchContainer.getHost());
+    params.put(ElasticConstants.PORT, 
String.valueOf(elasticsearchContainer.getMappedPort(9200)));
+    params.put(ElasticConstants.INDEX, "test-index");
+    params.put(ElasticConstants.SCHEME, "http");
+
+    IndexWriterParams writerParams = new IndexWriterParams(params);
+    indexWriter.open(writerParams);
+  }
+
+  @Override
+  public void tearDownIndexWriter() throws Exception {
+    if (indexWriter != null) {
+      try {
+        indexWriter.close();
+      } catch (Exception e) {
+        // Ignore if open() failed and close state is invalid
+      }
+      indexWriter = null;
+    }
+  }
+
+  @Override
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  @Override
+  public boolean supportsDelete() {
+    return true;
+  }
+
+  @Override
+  public void verifyDocumentWritten(String docId, String expectedTitle) throws 
Exception {
+    try (RestHighLevelClient client = new RestHighLevelClient(
+        RestClient.builder(
+            new HttpHost(elasticsearchContainer.getHost(),
+                elasticsearchContainer.getMappedPort(9200),
+                "http")))) {
+      GetRequest getRequest = new GetRequest("test-index", docId);
+      GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);
+      assertTrue(getResponse.isExists(), "Document should exist in index");
+      assertNotNull(getResponse.getSource());
+      assertEquals(expectedTitle, getResponse.getSource().get("title"));
+    }
+  }
+}
diff --git a/src/plugin/indexer-kafka/ivy.xml b/src/plugin/indexer-kafka/ivy.xml
index d6157d953..ffba6746d 100644
--- a/src/plugin/indexer-kafka/ivy.xml
+++ b/src/plugin/indexer-kafka/ivy.xml
@@ -37,6 +37,7 @@
   </publications>
 
   <dependencies>
+      <dependency org="org.testcontainers" name="testcontainers-kafka" 
rev="2.0.3" conf="test->default"/>
       <dependency org="org.apache.kafka" name="kafka_2.12" rev="3.7.0"/>
       <dependency org="org.apache.kafka" name="connect-json" rev="3.7.0"/>
   </dependencies>
diff --git 
a/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
 
b/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
new file mode 100644
index 000000000..4f6a306d4
--- /dev/null
+++ 
b/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.kafka;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.kafka.KafkaContainer;
+
+/**
+ * Integration tests for KafkaIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class KafkaIndexWriterIT extends AbstractIndexWriterIT {
+
+  private static final String KAFKA_IMAGE = "apache/kafka-native:3.8.0";
+  private static final String TEST_TOPIC = "nutch-indexer-test";
+
+  @Container
+  private static final KafkaContainer kafkaContainer =
+      new KafkaContainer(KAFKA_IMAGE);
+
+  private KafkaIndexWriter indexWriter;
+  private Configuration conf;
+
+  @Override
+  public void setUpIndexWriter() throws Exception {
+    conf = NutchConfiguration.create();
+    indexWriter = new KafkaIndexWriter();
+    indexWriter.setConf(conf);
+
+    String bootstrapServers = kafkaContainer.getBootstrapServers();
+    String hostPort = bootstrapServers.contains("://")
+        ? bootstrapServers.substring(bootstrapServers.indexOf("://") + 3)
+        : bootstrapServers;
+    String[] parts = hostPort.split(":");
+    String host = parts[0];
+    int port = Integer.parseInt(parts[1]);
+
+    Map<String, String> params = new HashMap<>();
+    params.put(KafkaConstants.HOST, host);
+    params.put(KafkaConstants.PORT, String.valueOf(port));
+    params.put(KafkaConstants.TOPIC, TEST_TOPIC);
+    params.put(KafkaConstants.VALUE_SERIALIZER,
+        "org.apache.kafka.connect.json.JsonSerializer");
+    params.put(KafkaConstants.KEY_SERIALIZER,
+        "org.apache.kafka.common.serialization.StringSerializer");
+
+    IndexWriterParams writerParams = new IndexWriterParams(params);
+    indexWriter.open(writerParams);
+  }
+
+  @Override
+  public void tearDownIndexWriter() throws Exception {
+    if (indexWriter != null) {
+      try {
+        indexWriter.close();
+      } catch (Exception e) {
+        // Ignore if open() failed and close state is invalid
+      }
+      indexWriter = null;
+    }
+  }
+
+  @Override
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  @Override
+  public boolean supportsDelete() {
+    return false;
+  }
+}
diff --git a/src/plugin/indexer-rabbit/ivy.xml 
b/src/plugin/indexer-rabbit/ivy.xml
index 81822a0fb..54930331c 100644
--- a/src/plugin/indexer-rabbit/ivy.xml
+++ b/src/plugin/indexer-rabbit/ivy.xml
@@ -35,5 +35,9 @@
     <!--get the artifact from our module name-->
     <artifact conf="master"/>
   </publications>
-  
+
+  <dependencies>
+    <dependency org="org.testcontainers" name="testcontainers-rabbitmq" 
rev="2.0.3" conf="test->default"/>
+  </dependencies>
+
 </ivy-module>
diff --git 
a/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
 
b/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
new file mode 100644
index 000000000..ed7d05535
--- /dev/null
+++ 
b/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.rabbit;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.testcontainers.containers.RabbitMQContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+/**
+ * Integration tests for RabbitIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class RabbitIndexWriterIT extends AbstractIndexWriterIT {
+
+  private static final String RABBITMQ_IMAGE = "rabbitmq:3.13-management";
+
+  @Container
+  private static final RabbitMQContainer rabbitContainer =
+      new RabbitMQContainer(RABBITMQ_IMAGE);
+
+  private RabbitIndexWriter indexWriter;
+  private Configuration conf;
+
+  @Override
+  public void setUpIndexWriter() throws Exception {
+    conf = NutchConfiguration.create();
+    indexWriter = new RabbitIndexWriter();
+    indexWriter.setConf(conf);
+
+    Map<String, String> params = new HashMap<>();
+    params.put(RabbitMQConstants.SERVER_URI, rabbitContainer.getAmqpUrl());
+    params.put(RabbitMQConstants.EXCHANGE_NAME, "nutch-indexer-test");
+    params.put(RabbitMQConstants.ROUTING_KEY, "indexer");
+    params.put(RabbitMQConstants.COMMIT_MODE, "single");
+    params.put(RabbitMQConstants.COMMIT_SIZE, "10");
+    params.put(RabbitMQConstants.BINDING, "true");
+    params.put(RabbitMQConstants.QUEUE_NAME, "nutch-indexer-queue");
+    params.put(RabbitMQConstants.EXCHANGE_OPTIONS, "type=direct,durable=true");
+    params.put(RabbitMQConstants.QUEUE_OPTIONS,
+        "durable=true,exclusive=false,auto-delete=false");
+
+    IndexWriterParams writerParams = new IndexWriterParams(params);
+    indexWriter.open(writerParams);
+  }
+
+  @Override
+  public void tearDownIndexWriter() throws Exception {
+    if (indexWriter != null) {
+      try {
+        indexWriter.close();
+      } catch (Exception e) {
+        // Ignore if open() failed and close state is invalid
+      }
+      indexWriter = null;
+    }
+  }
+
+  @Override
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  @Override
+  public boolean supportsDelete() {
+    return true;
+  }
+}
diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml
index 99a713c18..4d2120955 100644
--- a/src/plugin/indexer-solr/ivy.xml
+++ b/src/plugin/indexer-solr/ivy.xml
@@ -38,6 +38,7 @@
        </publications>
 
        <dependencies>
+               <dependency org="org.testcontainers" name="testcontainers-solr" 
rev="2.0.3" conf="test->default"/>
                <dependency org="org.apache.solr" name="solr-solrj"
                                  rev="8.11.4" conf="*->default">
                        <!-- exclusions of dependencies provided by Nutch core 
-->
diff --git 
a/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
 
b/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
new file mode 100644
index 000000000..dcd88bdac
--- /dev/null
+++ 
b/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.IndexerMapReduce;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.solr.SolrContainer;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for SolrIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class SolrIndexWriterIT extends AbstractIndexWriterIT {
+
+  private static final String SOLR_IMAGE = "solr:8.11.2";
+  private static final String COLLECTION = "nutch-test";
+
+  @Container
+  private static final SolrContainer solrContainer =
+      new SolrContainer(SOLR_IMAGE).withCollection(COLLECTION);
+
+  private SolrIndexWriter indexWriter;
+  private Configuration conf;
+
+  @Override
+  public void setUpIndexWriter() throws Exception {
+    conf = NutchConfiguration.create();
+    conf.setBoolean(IndexerMapReduce.INDEXER_DELETE, false);
+
+    indexWriter = new SolrIndexWriter();
+    indexWriter.setConf(conf);
+
+    String solrUrl = "http://"; + solrContainer.getHost() + ":"
+        + solrContainer.getSolrPort() + "/solr/" + COLLECTION;
+
+    Map<String, String> params = new HashMap<>();
+    params.put(SolrConstants.SERVER_TYPE, "http");
+    params.put(SolrConstants.SERVER_URLS, solrUrl);
+    params.put(SolrConstants.COLLECTION, COLLECTION);
+    params.put(SolrConstants.COMMIT_SIZE, "100");
+
+    IndexWriterParams writerParams = new IndexWriterParams(params);
+    indexWriter.open(writerParams);
+  }
+
+  @Override
+  public void tearDownIndexWriter() throws Exception {
+    if (indexWriter != null) {
+      try {
+        indexWriter.close();
+      } catch (Exception e) {
+        // Ignore if open() failed and close state is invalid
+      }
+      indexWriter = null;
+    }
+  }
+
+  @Override
+  public IndexWriter getIndexWriter() {
+    return indexWriter;
+  }
+
+  @Override
+  public boolean supportsDelete() {
+    return true;
+  }
+
+  @Override
+  public void verifyDocumentWritten(String docId, String expectedTitle) throws 
Exception {
+    try (SolrClient client = new Http2SolrClient.Builder(
+        "http://"; + solrContainer.getHost() + ":"
+            + solrContainer.getSolrPort() + "/solr/" + COLLECTION).build()) {
+      ModifiableSolrParams queryParams = new ModifiableSolrParams();
+      queryParams.set("q", "id:" + docId);
+      QueryResponse response = client.query(queryParams);
+      assertTrue(response.getResults().getNumFound() >= 1,
+          "Document should exist in Solr");
+      Object titleValue = response.getResults().get(0).getFieldValue("title");
+      String title = titleValue instanceof Collection
+          ? ((Collection<?>) titleValue).iterator().next().toString()
+          : titleValue.toString();
+      assertEquals(expectedTitle, title);
+    }
+  }
+
+  @Override
+  public IndexWriter prepareWriterForDeleteTest() throws Exception {
+    tearDownIndexWriter();
+
+    Configuration deleteConf = NutchConfiguration.create();
+    deleteConf.setBoolean(IndexerMapReduce.INDEXER_DELETE, true);
+    SolrIndexWriter deleteWriter = new SolrIndexWriter();
+    deleteWriter.setConf(deleteConf);
+
+    String solrUrl = "http://"; + solrContainer.getHost() + ":"
+        + solrContainer.getSolrPort() + "/solr/" + COLLECTION;
+    Map<String, String> params = new HashMap<>();
+    params.put(SolrConstants.SERVER_TYPE, "http");
+    params.put(SolrConstants.SERVER_URLS, solrUrl);
+    params.put(SolrConstants.COLLECTION, COLLECTION);
+    deleteWriter.open(new IndexWriterParams(params));
+
+    return deleteWriter;
+  }
+}
diff --git a/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java 
b/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java
new file mode 100644
index 000000000..b0bf6e023
--- /dev/null
+++ b/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+import org.apache.nutch.indexer.NutchDocument;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+/**
+ * Abstract base for IndexWriter integration tests. Provides common test logic
+ * for write/commit and delete operations.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public abstract class AbstractIndexWriterIT implements 
IndexWriterIntegrationTest {
+
+  @BeforeEach
+  void setUp() throws Exception {
+    setUpIndexWriter();
+  }
+
+  @AfterEach
+  void tearDown() throws Exception {
+    tearDownIndexWriter();
+  }
+
+  @Test
+  void testWriteAndCommitDocument() throws Exception {
+    NutchDocument doc = createTestDocument("test-doc-1", "Test Document",
+        "This is a test document for integration testing.");
+    assertDoesNotThrow(() -> getIndexWriter().write(doc));
+    assertDoesNotThrow(() -> getIndexWriter().commit());
+    tearDownIndexWriter();
+    verifyDocumentWritten("test-doc-1", "Test Document");
+  }
+
+  @Test
+  void testDeleteDocument() throws Exception {
+    if (!supportsDelete()) {
+      return;
+    }
+    String docId = "test-doc-to-delete";
+    NutchDocument doc = createTestDocument(docId, "Document to Delete", "");
+
+    IndexWriter writer = getIndexWriter();
+    writer.write(doc);
+    writer.commit();
+
+    IndexWriter deleteWriter = prepareWriterForDeleteTest();
+    if (deleteWriter == null) {
+      deleteWriter = writer;
+    }
+    final IndexWriter writerForDelete = deleteWriter;
+    assertDoesNotThrow(() -> writerForDelete.delete(docId));
+    assertDoesNotThrow(() -> writerForDelete.commit());
+    if (deleteWriter != writer) {
+      try {
+        deleteWriter.close();
+      } catch (Exception e) {
+        // Ignore
+      }
+    }
+  }
+
+  /** Create a NutchDocument with id, title, and content. */
+  protected NutchDocument createTestDocument(String id, String title, String 
content) {
+    NutchDocument doc = new NutchDocument();
+    doc.add("id", id);
+    doc.add("title", title);
+    doc.add("content", content);
+    return doc;
+  }
+}
diff --git a/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java 
b/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java
new file mode 100644
index 000000000..c6f1027da
--- /dev/null
+++ b/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer;
+
+/**
+ * Contract for IndexWriter integration tests. Implementations run against
+ * real backends via Testcontainers.
+ */
+public interface IndexWriterIntegrationTest {
+
+  /** Open the index writer before tests. */
+  void setUpIndexWriter() throws Exception;
+
+  /** Close the index writer after tests. */
+  void tearDownIndexWriter() throws Exception;
+
+  /** The IndexWriter under test. */
+  IndexWriter getIndexWriter();
+
+  /** Whether this writer supports document deletion (e.g. Kafka does not). */
+  boolean supportsDelete();
+
+  /**
+   * Optional verification that a document was indexed.
+   * Default no-op; override for Elastic, Solr.
+   */
+  default void verifyDocumentWritten(String docId, String expectedTitle) 
throws Exception {
+    // no-op
+  }
+
+  /**
+   * Optional writer configured for delete operations. Used when the main
+   * writer has delete disabled (e.g. Solr requires INDEXER_DELETE=true).
+   * Default returns null to use {@link #getIndexWriter()}.
+   */
+  default IndexWriter prepareWriterForDeleteTest() throws Exception {
+    return null;
+  }
+}

Reply via email to