This is an automated email from the ASF dual-hosted git repository.
lewismc pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new 64ac8b463 NUTCH-3154 Implement integration testing framework for Nutch
IndexWriter plugins using Testcontainers (#895)
64ac8b463 is described below
commit 64ac8b46347e6d836fc7c8ef693065742bf519e6
Author: Lewis John McGibbney <[email protected]>
AuthorDate: Sat Feb 21 08:42:16 2026 -0800
NUTCH-3154 Implement integration testing framework for Nutch IndexWriter
plugins using Testcontainers (#895)
---
.github/workflows/master-build.yml | 6 +
build.xml | 4 +
ivy/ivy.xml | 4 +
src/plugin/build-plugin.xml | 31 ++++-
src/plugin/build.xml | 11 ++
src/plugin/indexer-elastic/ivy.xml | 5 +-
.../indexwriter/elastic/ElasticIndexWriterIT.java | 113 +++++++++++++++++
src/plugin/indexer-kafka/ivy.xml | 1 +
.../indexwriter/kafka/KafkaIndexWriterIT.java | 96 +++++++++++++++
src/plugin/indexer-rabbit/ivy.xml | 6 +-
.../indexwriter/rabbit/RabbitIndexWriterIT.java | 90 ++++++++++++++
src/plugin/indexer-solr/ivy.xml | 1 +
.../nutch/indexwriter/solr/SolrIndexWriterIT.java | 137 +++++++++++++++++++++
.../nutch/indexer/AbstractIndexWriterIT.java | 90 ++++++++++++++
.../nutch/indexer/IndexWriterIntegrationTest.java | 53 ++++++++
15 files changed, 644 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/master-build.yml
b/.github/workflows/master-build.yml
index 1fe9da252..2056c1e10 100644
--- a/.github/workflows/master-build.yml
+++ b/.github/workflows/master-build.yml
@@ -204,6 +204,8 @@ jobs:
- 'src/testresources/**'
plugins:
- 'src/plugin/**'
+ indexer_plugins:
+ - 'src/plugin/indexer-*/**'
buildconf:
- 'build.xml'
- 'ivy/ivy.xml'
@@ -220,6 +222,10 @@ jobs:
- name: test plugins
if: ${{ steps.filter.outputs.plugins == 'true' &&
steps.filter.outputs.core == 'false' && steps.filter.outputs.buildconf ==
'false' }}
run: ant clean test-plugins -buildfile build.xml
+ # run indexer integration tests when indexer plugin files change (Docker
required, ubuntu-latest only)
+ - name: test indexer integration
+ if: ${{ steps.filter.outputs.indexer_plugins == 'true' && matrix.os ==
'ubuntu-latest' }}
+ run: ant clean test-indexer-integration -buildfile build.xml
- name: Check for test results
id: check_tests
if: always() && matrix.os == 'ubuntu-latest'
diff --git a/build.xml b/build.xml
index 277225d24..57d44ee49 100644
--- a/build.xml
+++ b/build.xml
@@ -545,6 +545,10 @@
<ant dir="src/plugin" target="test-single" inheritAll="false"/>
</target>
+ <target name="test-indexer-integration" depends="resolve-test, compile,
compile-core-test, job" description="--> run indexer plugin integration tests
(Testcontainers)">
+ <ant dir="src/plugin" target="test-indexer-integration"
inheritAll="false"/>
+ </target>
+
<target name="nightly" depends="test, tar-src, zip-src" description="--> run
the nightly target build">
</target>
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index 06e269bf5..b6b8f67a9 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -128,6 +128,10 @@
<dependency org="org.mockito" name="mockito-core" rev="5.18.0"
conf="test->default"/>
<dependency org="org.mockito" name="mockito-junit-jupiter" rev="5.18.0"
conf="test->default"/>
+ <!-- Testcontainers for indexer plugin integration tests -->
+ <dependency org="org.testcontainers" name="testcontainers" rev="2.0.3"
conf="test->default"/>
+ <dependency org="org.testcontainers" name="junit-jupiter" rev="1.21.4"
conf="test->default"/>
+
<!-- Jetty used to serve test pages for unit tests, but is also
provided as dependency of Hadoop -->
<dependency org="org.eclipse.jetty" name="jetty-server"
rev="12.1.5" conf="test->default">
<exclude org="ch.qos.reload4j" module="*" />
diff --git a/src/plugin/build-plugin.xml b/src/plugin/build-plugin.xml
index f1787ed03..ef8dda56c 100755
--- a/src/plugin/build-plugin.xml
+++ b/src/plugin/build-plugin.xml
@@ -189,7 +189,7 @@
<!-- ================================================================== -->
<!-- Compile test code -->
<!-- ================================================================== -->
- <target name="compile-test" depends="compile, deps-test-compile"
if="test.available">
+ <target name="compile-test" depends="resolve-test, compile,
deps-test-compile" if="test.available">
<javac
encoding="${build.encoding}"
srcdir="${src.test}"
@@ -242,7 +242,34 @@
</testclasses>
</junitlauncher>
<fail if="tests.failed">Tests failed!</fail>
- </target>
+ </target>
+
+ <!-- ================================================================== -->
+ <!-- Run indexer plugin integration tests (Testcontainers) -->
+ <!-- ================================================================== -->
+ <target name="test-indexer-integration" depends="compile-test, deploy"
if="test.available">
+ <echo message="Running indexer integration tests for plugin: ${name}"/>
+ <junitlauncher printSummary="true" haltOnFailure="false"
failureProperty="integration.tests.failed">
+ <classpath refid="test.classpath"/>
+ <testclasses outputDir="${build.test}">
+ <listener type="legacy-plain" sendSysOut="true" sendSysErr="true"/>
+ <listener type="legacy-xml" sendSysOut="true" sendSysErr="true"/>
+ <fork forkMode="perTestClass">
+ <jvmarg value="-Xmx2000m"/>
+ <sysproperty key="test.data" value="${build.test}/data"/>
+ <sysproperty key="test.input" value="${root}/data"/>
+ <sysproperty key="testcontainers.reuse.enable" value="true"/>
+ <sysproperty key="javax.xml.parsers.DocumentBuilderFactory"
value="com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl"/>
+ </fork>
+ <fileset dir="${build.test}">
+ <include name="**/IT*.class"/>
+ <include name="**/*IT.class"/>
+ <include name="**/*IntegrationTest.class"/>
+ </fileset>
+ </testclasses>
+ </junitlauncher>
+ <fail if="integration.tests.failed">Indexer integration tests
failed!</fail>
+ </target>
<!-- target: resolve ================================================= -->
<target name="resolve-default" depends="clean-lib" description="resolve and
retrieve dependencies with ivy">
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index b7a5a7721..18d00da3b 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -172,6 +172,17 @@
<ant dir="${plugin}" target="test"/>
</target>
+ <!-- ====================================================== -->
+ <!-- Indexer plugin integration tests (Testcontainers) -->
+ <!-- Run sequentially to avoid container resource contention -->
+ <!-- ====================================================== -->
+ <target name="test-indexer-integration">
+ <ant dir="indexer-elastic" target="test-indexer-integration"/>
+ <ant dir="indexer-kafka" target="test-indexer-integration"/>
+ <ant dir="indexer-rabbit" target="test-indexer-integration"/>
+ <ant dir="indexer-solr" target="test-indexer-integration"/>
+ </target>
+
<!-- ====================================================== -->
<!-- Clean all of the plugins. -->
<!-- ====================================================== -->
diff --git a/src/plugin/indexer-elastic/ivy.xml
b/src/plugin/indexer-elastic/ivy.xml
index ee812a225..04c1a071d 100644
--- a/src/plugin/indexer-elastic/ivy.xml
+++ b/src/plugin/indexer-elastic/ivy.xml
@@ -36,7 +36,10 @@
</publications>
<dependencies>
- <dependency org="org.elasticsearch.client"
name="elasticsearch-rest-high-level-client" rev="7.10.2"/>
+ <dependency org="org.testcontainers" name="testcontainers-elasticsearch"
rev="2.0.3" conf="test->default"/>
+ <dependency org="org.elasticsearch.client"
name="elasticsearch-rest-high-level-client" rev="7.10.2">
+ <exclude org="org.apache.logging.log4j" name="*"/>
+ </dependency>
<dependency org="org.apache.lucene" name="lucene-analyzers-common"
rev="8.11.2"/>
<dependency org="org.apache.lucene" name="lucene-backward-codecs"
rev="8.11.2"/>
<dependency org="org.apache.lucene" name="lucene-core" rev="8.11.2"/>
diff --git
a/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
b/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
new file mode 100644
index 000000000..0479213c3
--- /dev/null
+++
b/src/plugin/indexer-elastic/src/test/org/apache/nutch/indexwriter/elastic/ElasticIndexWriterIT.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.elastic;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.http.HttpHost;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.elasticsearch.action.get.GetRequest;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.RestClient;
+import org.elasticsearch.client.RestHighLevelClient;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for ElasticIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class ElasticIndexWriterIT extends AbstractIndexWriterIT {
+
+ private static final String ELASTICSEARCH_IMAGE =
+ "docker.elastic.co/elasticsearch/elasticsearch:7.10.2";
+
+ @Container
+ private static final ElasticsearchContainer elasticsearchContainer =
+ new ElasticsearchContainer(ELASTICSEARCH_IMAGE)
+ .withEnv("discovery.type", "single-node")
+ .withEnv("xpack.security.enabled", "false");
+
+ private ElasticIndexWriter indexWriter;
+ private Configuration conf;
+
+ @Override
+ public void setUpIndexWriter() throws Exception {
+ conf = NutchConfiguration.create();
+ indexWriter = new ElasticIndexWriter();
+ indexWriter.setConf(conf);
+
+ Map<String, String> params = new HashMap<>();
+ params.put(ElasticConstants.HOSTS, elasticsearchContainer.getHost());
+ params.put(ElasticConstants.PORT,
String.valueOf(elasticsearchContainer.getMappedPort(9200)));
+ params.put(ElasticConstants.INDEX, "test-index");
+ params.put(ElasticConstants.SCHEME, "http");
+
+ IndexWriterParams writerParams = new IndexWriterParams(params);
+ indexWriter.open(writerParams);
+ }
+
+ @Override
+ public void tearDownIndexWriter() throws Exception {
+ if (indexWriter != null) {
+ try {
+ indexWriter.close();
+ } catch (Exception e) {
+ // Ignore if open() failed and close state is invalid
+ }
+ indexWriter = null;
+ }
+ }
+
+ @Override
+ public IndexWriter getIndexWriter() {
+ return indexWriter;
+ }
+
+ @Override
+ public boolean supportsDelete() {
+ return true;
+ }
+
+ @Override
+ public void verifyDocumentWritten(String docId, String expectedTitle) throws
Exception {
+ try (RestHighLevelClient client = new RestHighLevelClient(
+ RestClient.builder(
+ new HttpHost(elasticsearchContainer.getHost(),
+ elasticsearchContainer.getMappedPort(9200),
+ "http")))) {
+ GetRequest getRequest = new GetRequest("test-index", docId);
+ GetResponse getResponse = client.get(getRequest, RequestOptions.DEFAULT);
+ assertTrue(getResponse.isExists(), "Document should exist in index");
+ assertNotNull(getResponse.getSource());
+ assertEquals(expectedTitle, getResponse.getSource().get("title"));
+ }
+ }
+}
diff --git a/src/plugin/indexer-kafka/ivy.xml b/src/plugin/indexer-kafka/ivy.xml
index d6157d953..ffba6746d 100644
--- a/src/plugin/indexer-kafka/ivy.xml
+++ b/src/plugin/indexer-kafka/ivy.xml
@@ -37,6 +37,7 @@
</publications>
<dependencies>
+ <dependency org="org.testcontainers" name="testcontainers-kafka"
rev="2.0.3" conf="test->default"/>
<dependency org="org.apache.kafka" name="kafka_2.12" rev="3.7.0"/>
<dependency org="org.apache.kafka" name="connect-json" rev="3.7.0"/>
</dependencies>
diff --git
a/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
b/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
new file mode 100644
index 000000000..4f6a306d4
--- /dev/null
+++
b/src/plugin/indexer-kafka/src/test/org/apache/nutch/indexwriter/kafka/KafkaIndexWriterIT.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.kafka;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.kafka.KafkaContainer;
+
+/**
+ * Integration tests for KafkaIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class KafkaIndexWriterIT extends AbstractIndexWriterIT {
+
+ private static final String KAFKA_IMAGE = "apache/kafka-native:3.8.0";
+ private static final String TEST_TOPIC = "nutch-indexer-test";
+
+ @Container
+ private static final KafkaContainer kafkaContainer =
+ new KafkaContainer(KAFKA_IMAGE);
+
+ private KafkaIndexWriter indexWriter;
+ private Configuration conf;
+
+ @Override
+ public void setUpIndexWriter() throws Exception {
+ conf = NutchConfiguration.create();
+ indexWriter = new KafkaIndexWriter();
+ indexWriter.setConf(conf);
+
+ String bootstrapServers = kafkaContainer.getBootstrapServers();
+ String hostPort = bootstrapServers.contains("://")
+ ? bootstrapServers.substring(bootstrapServers.indexOf("://") + 3)
+ : bootstrapServers;
+ String[] parts = hostPort.split(":");
+ String host = parts[0];
+ int port = Integer.parseInt(parts[1]);
+
+ Map<String, String> params = new HashMap<>();
+ params.put(KafkaConstants.HOST, host);
+ params.put(KafkaConstants.PORT, String.valueOf(port));
+ params.put(KafkaConstants.TOPIC, TEST_TOPIC);
+ params.put(KafkaConstants.VALUE_SERIALIZER,
+ "org.apache.kafka.connect.json.JsonSerializer");
+ params.put(KafkaConstants.KEY_SERIALIZER,
+ "org.apache.kafka.common.serialization.StringSerializer");
+
+ IndexWriterParams writerParams = new IndexWriterParams(params);
+ indexWriter.open(writerParams);
+ }
+
+ @Override
+ public void tearDownIndexWriter() throws Exception {
+ if (indexWriter != null) {
+ try {
+ indexWriter.close();
+ } catch (Exception e) {
+ // Ignore if open() failed and close state is invalid
+ }
+ indexWriter = null;
+ }
+ }
+
+ @Override
+ public IndexWriter getIndexWriter() {
+ return indexWriter;
+ }
+
+ @Override
+ public boolean supportsDelete() {
+ return false;
+ }
+}
diff --git a/src/plugin/indexer-rabbit/ivy.xml
b/src/plugin/indexer-rabbit/ivy.xml
index 81822a0fb..54930331c 100644
--- a/src/plugin/indexer-rabbit/ivy.xml
+++ b/src/plugin/indexer-rabbit/ivy.xml
@@ -35,5 +35,9 @@
<!--get the artifact from our module name-->
<artifact conf="master"/>
</publications>
-
+
+ <dependencies>
+ <dependency org="org.testcontainers" name="testcontainers-rabbitmq"
rev="2.0.3" conf="test->default"/>
+ </dependencies>
+
</ivy-module>
diff --git
a/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
b/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
new file mode 100644
index 000000000..ed7d05535
--- /dev/null
+++
b/src/plugin/indexer-rabbit/src/test/org/apache/nutch/indexwriter/rabbit/RabbitIndexWriterIT.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.rabbit;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.testcontainers.containers.RabbitMQContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+/**
+ * Integration tests for RabbitIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class RabbitIndexWriterIT extends AbstractIndexWriterIT {
+
+ private static final String RABBITMQ_IMAGE = "rabbitmq:3.13-management";
+
+ @Container
+ private static final RabbitMQContainer rabbitContainer =
+ new RabbitMQContainer(RABBITMQ_IMAGE);
+
+ private RabbitIndexWriter indexWriter;
+ private Configuration conf;
+
+ @Override
+ public void setUpIndexWriter() throws Exception {
+ conf = NutchConfiguration.create();
+ indexWriter = new RabbitIndexWriter();
+ indexWriter.setConf(conf);
+
+ Map<String, String> params = new HashMap<>();
+ params.put(RabbitMQConstants.SERVER_URI, rabbitContainer.getAmqpUrl());
+ params.put(RabbitMQConstants.EXCHANGE_NAME, "nutch-indexer-test");
+ params.put(RabbitMQConstants.ROUTING_KEY, "indexer");
+ params.put(RabbitMQConstants.COMMIT_MODE, "single");
+ params.put(RabbitMQConstants.COMMIT_SIZE, "10");
+ params.put(RabbitMQConstants.BINDING, "true");
+ params.put(RabbitMQConstants.QUEUE_NAME, "nutch-indexer-queue");
+ params.put(RabbitMQConstants.EXCHANGE_OPTIONS, "type=direct,durable=true");
+ params.put(RabbitMQConstants.QUEUE_OPTIONS,
+ "durable=true,exclusive=false,auto-delete=false");
+
+ IndexWriterParams writerParams = new IndexWriterParams(params);
+ indexWriter.open(writerParams);
+ }
+
+ @Override
+ public void tearDownIndexWriter() throws Exception {
+ if (indexWriter != null) {
+ try {
+ indexWriter.close();
+ } catch (Exception e) {
+ // Ignore if open() failed and close state is invalid
+ }
+ indexWriter = null;
+ }
+ }
+
+ @Override
+ public IndexWriter getIndexWriter() {
+ return indexWriter;
+ }
+
+ @Override
+ public boolean supportsDelete() {
+ return true;
+ }
+}
diff --git a/src/plugin/indexer-solr/ivy.xml b/src/plugin/indexer-solr/ivy.xml
index 99a713c18..4d2120955 100644
--- a/src/plugin/indexer-solr/ivy.xml
+++ b/src/plugin/indexer-solr/ivy.xml
@@ -38,6 +38,7 @@
</publications>
<dependencies>
+ <dependency org="org.testcontainers" name="testcontainers-solr"
rev="2.0.3" conf="test->default"/>
<dependency org="org.apache.solr" name="solr-solrj"
rev="8.11.4" conf="*->default">
<!-- exclusions of dependencies provided by Nutch core
-->
diff --git
a/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
b/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
new file mode 100644
index 000000000..dcd88bdac
--- /dev/null
+++
b/src/plugin/indexer-solr/src/test/org/apache/nutch/indexwriter/solr/SolrIndexWriterIT.java
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexwriter.solr;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.indexer.IndexerMapReduce;
+import org.apache.nutch.indexer.AbstractIndexWriterIT;
+import org.apache.nutch.indexer.IndexWriter;
+import org.apache.nutch.indexer.IndexWriterParams;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.util.NutchConfiguration;
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.Http2SolrClient;
+import org.apache.solr.client.solrj.response.QueryResponse;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.solr.SolrContainer;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Integration tests for SolrIndexWriter using Testcontainers.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public class SolrIndexWriterIT extends AbstractIndexWriterIT {
+
+ private static final String SOLR_IMAGE = "solr:8.11.2";
+ private static final String COLLECTION = "nutch-test";
+
+ @Container
+ private static final SolrContainer solrContainer =
+ new SolrContainer(SOLR_IMAGE).withCollection(COLLECTION);
+
+ private SolrIndexWriter indexWriter;
+ private Configuration conf;
+
+ @Override
+ public void setUpIndexWriter() throws Exception {
+ conf = NutchConfiguration.create();
+ conf.setBoolean(IndexerMapReduce.INDEXER_DELETE, false);
+
+ indexWriter = new SolrIndexWriter();
+ indexWriter.setConf(conf);
+
+ String solrUrl = "http://" + solrContainer.getHost() + ":"
+ + solrContainer.getSolrPort() + "/solr/" + COLLECTION;
+
+ Map<String, String> params = new HashMap<>();
+ params.put(SolrConstants.SERVER_TYPE, "http");
+ params.put(SolrConstants.SERVER_URLS, solrUrl);
+ params.put(SolrConstants.COLLECTION, COLLECTION);
+ params.put(SolrConstants.COMMIT_SIZE, "100");
+
+ IndexWriterParams writerParams = new IndexWriterParams(params);
+ indexWriter.open(writerParams);
+ }
+
+ @Override
+ public void tearDownIndexWriter() throws Exception {
+ if (indexWriter != null) {
+ try {
+ indexWriter.close();
+ } catch (Exception e) {
+ // Ignore if open() failed and close state is invalid
+ }
+ indexWriter = null;
+ }
+ }
+
+ @Override
+ public IndexWriter getIndexWriter() {
+ return indexWriter;
+ }
+
+ @Override
+ public boolean supportsDelete() {
+ return true;
+ }
+
+ @Override
+ public void verifyDocumentWritten(String docId, String expectedTitle) throws
Exception {
+ try (SolrClient client = new Http2SolrClient.Builder(
+ "http://" + solrContainer.getHost() + ":"
+ + solrContainer.getSolrPort() + "/solr/" + COLLECTION).build()) {
+ ModifiableSolrParams queryParams = new ModifiableSolrParams();
+ queryParams.set("q", "id:" + docId);
+ QueryResponse response = client.query(queryParams);
+ assertTrue(response.getResults().getNumFound() >= 1,
+ "Document should exist in Solr");
+ Object titleValue = response.getResults().get(0).getFieldValue("title");
+ String title = titleValue instanceof Collection
+ ? ((Collection<?>) titleValue).iterator().next().toString()
+ : titleValue.toString();
+ assertEquals(expectedTitle, title);
+ }
+ }
+
+ @Override
+ public IndexWriter prepareWriterForDeleteTest() throws Exception {
+ tearDownIndexWriter();
+
+ Configuration deleteConf = NutchConfiguration.create();
+ deleteConf.setBoolean(IndexerMapReduce.INDEXER_DELETE, true);
+ SolrIndexWriter deleteWriter = new SolrIndexWriter();
+ deleteWriter.setConf(deleteConf);
+
+ String solrUrl = "http://" + solrContainer.getHost() + ":"
+ + solrContainer.getSolrPort() + "/solr/" + COLLECTION;
+ Map<String, String> params = new HashMap<>();
+ params.put(SolrConstants.SERVER_TYPE, "http");
+ params.put(SolrConstants.SERVER_URLS, solrUrl);
+ params.put(SolrConstants.COLLECTION, COLLECTION);
+ deleteWriter.open(new IndexWriterParams(params));
+
+ return deleteWriter;
+ }
+}
diff --git a/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java
b/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java
new file mode 100644
index 000000000..b0bf6e023
--- /dev/null
+++ b/src/test/org/apache/nutch/indexer/AbstractIndexWriterIT.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+
+import org.apache.nutch.indexer.NutchDocument;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+/**
+ * Abstract base for IndexWriter integration tests. Provides common test logic
+ * for write/commit and delete operations.
+ */
+@Testcontainers(disabledWithoutDocker = true)
+public abstract class AbstractIndexWriterIT implements
IndexWriterIntegrationTest {
+
+ @BeforeEach
+ void setUp() throws Exception {
+ setUpIndexWriter();
+ }
+
+ @AfterEach
+ void tearDown() throws Exception {
+ tearDownIndexWriter();
+ }
+
+ @Test
+ void testWriteAndCommitDocument() throws Exception {
+ NutchDocument doc = createTestDocument("test-doc-1", "Test Document",
+ "This is a test document for integration testing.");
+ assertDoesNotThrow(() -> getIndexWriter().write(doc));
+ assertDoesNotThrow(() -> getIndexWriter().commit());
+ tearDownIndexWriter();
+ verifyDocumentWritten("test-doc-1", "Test Document");
+ }
+
+ @Test
+ void testDeleteDocument() throws Exception {
+ if (!supportsDelete()) {
+ return;
+ }
+ String docId = "test-doc-to-delete";
+ NutchDocument doc = createTestDocument(docId, "Document to Delete", "");
+
+ IndexWriter writer = getIndexWriter();
+ writer.write(doc);
+ writer.commit();
+
+ IndexWriter deleteWriter = prepareWriterForDeleteTest();
+ if (deleteWriter == null) {
+ deleteWriter = writer;
+ }
+ final IndexWriter writerForDelete = deleteWriter;
+ assertDoesNotThrow(() -> writerForDelete.delete(docId));
+ assertDoesNotThrow(() -> writerForDelete.commit());
+ if (deleteWriter != writer) {
+ try {
+ deleteWriter.close();
+ } catch (Exception e) {
+ // Ignore
+ }
+ }
+ }
+
+ /** Create a NutchDocument with id, title, and content. */
+ protected NutchDocument createTestDocument(String id, String title, String
content) {
+ NutchDocument doc = new NutchDocument();
+ doc.add("id", id);
+ doc.add("title", title);
+ doc.add("content", content);
+ return doc;
+ }
+}
diff --git a/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java
b/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java
new file mode 100644
index 000000000..c6f1027da
--- /dev/null
+++ b/src/test/org/apache/nutch/indexer/IndexWriterIntegrationTest.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.indexer;
+
+/**
+ * Contract for IndexWriter integration tests. Implementations run against
+ * real backends via Testcontainers.
+ */
+public interface IndexWriterIntegrationTest {
+
+ /** Open the index writer before tests. */
+ void setUpIndexWriter() throws Exception;
+
+ /** Close the index writer after tests. */
+ void tearDownIndexWriter() throws Exception;
+
+ /** The IndexWriter under test. */
+ IndexWriter getIndexWriter();
+
+ /** Whether this writer supports document deletion (e.g. Kafka does not). */
+ boolean supportsDelete();
+
+ /**
+ * Optional verification that a document was indexed.
+ * Default no-op; override for Elastic, Solr.
+ */
+ default void verifyDocumentWritten(String docId, String expectedTitle)
throws Exception {
+ // no-op
+ }
+
+ /**
+ * Optional writer configured for delete operations. Used when the main
+ * writer has delete disabled (e.g. Solr requires INDEXER_DELETE=true).
+ * Default returns null to use {@link #getIndexWriter()}.
+ */
+ default IndexWriter prepareWriterForDeleteTest() throws Exception {
+ return null;
+ }
+}