This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch 1761
in repository https://gitbox.apache.org/repos/asf/stormcrawler.git

commit a0c36a9fe14d5a4f91b5a2d18add0f1e9bea55e8
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Dec 26 10:28:07 2025 +0100

    Fix #1761 - Add docker compose config to archetypes
---
 .../main/resources/archetype-resources/README.md   |  8 ++-
 .../archetype-resources/docker-compose.yml         | 54 +++++++++++++++
 docs/src/main/asciidoc/quick-start.adoc            |  2 +-
 .../main/resources/archetype-resources/README.md   | 14 ++++
 .../archetype-resources/docker-compose.yml         | 81 ++++++++++++++++++++++
 5 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/archetype/src/main/resources/archetype-resources/README.md 
b/archetype/src/main/resources/archetype-resources/README.md
index 9f4fce32..920db4ec 100644
--- a/archetype/src/main/resources/archetype-resources/README.md
+++ b/archetype/src/main/resources/archetype-resources/README.md
@@ -3,8 +3,9 @@ Have a look at the code and resources and modify them to your 
heart's content.
 
 # Prerequisites
 
-You need to install Apache Storm. The instructions on [setting up a Storm 
cluster](https://storm.apache.org/releases/2.6.2/Setting-up-a-Storm-cluster.html)
 should help. 
+## Native
 
+You need to install Apache Storm. The instructions on [setting up a Storm 
cluster](https://storm.apache.org/releases/2.8.3/Setting-up-a-Storm-cluster.html)
 should help.
 You also need to have an instance of URLFrontier running. See [the URLFrontier 
README](https://github.com/crawler-commons/url-frontier/tree/master/service); 
the easiest way is to use Docker, like so:
 
 ```
@@ -12,6 +13,11 @@ docker pull crawlercommons/url-frontier
 docker run --rm --name frontier -p 7071:7071  crawlercommons/url-frontier
 ```
 
+## Docker Compose
+
+We provide a simple `docker-compose.yaml` file to launch URLFrontier, 
Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI.
+You may need to update `crawler-conf.yaml` to reference the URLFrontier host 
configuration (Docker container name).
+
 # Compilation
 
 Generate an uberjar with
diff --git 
a/archetype/src/main/resources/archetype-resources/docker-compose.yml 
b/archetype/src/main/resources/archetype-resources/docker-compose.yml
new file mode 100644
index 00000000..ae9cb8a6
--- /dev/null
+++ b/archetype/src/main/resources/archetype-resources/docker-compose.yml
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+services:
+  zookeeper:
+    image: zookeeper:3.9.3
+    container_name: zookeeper
+    restart: always
+
+  nimbus:
+    image: storm:latest
+    container_name: nimbus
+    hostname: nimbus
+    command: storm nimbus
+    depends_on:
+      - zookeeper
+    restart: always
+
+  supervisor:
+    image: storm:latest
+    container_name: supervisor
+    command: storm supervisor -c worker.childopts=-Xmx%HEAP-MEM%m
+    depends_on:
+      - nimbus
+      - zookeeper
+    restart: always
+
+  ui:
+    image: storm:latest
+    container_name: ui
+    command: storm ui
+    depends_on:
+      - nimbus
+    restart: always
+    ports:
+      - "127.0.0.1:8080:8080"
+
+  urlfrontier:
+    image: crawlercommons/url-frontier:latest
+    container_name: urlfrontier
+    restart: always
+    ports:
+      - "127.0.0.1:7071:7071"
\ No newline at end of file
diff --git a/docs/src/main/asciidoc/quick-start.adoc 
b/docs/src/main/asciidoc/quick-start.adoc
index b3f6f89b..6b661523 100644
--- a/docs/src/main/asciidoc/quick-start.adoc
+++ b/docs/src/main/asciidoc/quick-start.adoc
@@ -68,7 +68,7 @@ By exploring that part of the documentation, you can gain a 
better understanding
 
 ==== Docker Compose Setup
 
-Below is a simple `docker-compose.yaml` configuration to spin up URLFrontier, 
Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI:
+Below is a simple `docker-compose.yml` configuration to spin up URLFrontier, 
Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI:
 
 [source,yaml]
 ----
diff --git 
a/external/opensearch/archetype/src/main/resources/archetype-resources/README.md
 
b/external/opensearch/archetype/src/main/resources/archetype-resources/README.md
index 98825846..ddd7be94 100644
--- 
a/external/opensearch/archetype/src/main/resources/archetype-resources/README.md
+++ 
b/external/opensearch/archetype/src/main/resources/archetype-resources/README.md
@@ -1,14 +1,26 @@
 This has been generated by the StormCrawler Maven Archetype as a starting 
point for building your own crawler with [OpenSearch](https://opensearch.org/) 
as a backend.
 Have a look at the code and resources and modify them to your heart's content. 
 
+# Prerequisites
+
+## Native
 You need to have Apache Storm installed, as well as a running instance of 
OpenSearch.
 
+## Docker Compose
+
+We provide a simple `docker-compose.yaml` file to launch OpenSearch, 
Zookeeper, Storm Nimbus, Storm Supervisor, and the Storm UI.
+You may need to update `opensearch-conf.yaml` to reference the OpenSearch host 
configuration (Docker container name).
+
+# Compilation
+
 First generate an uberjar:
 
 ``` sh
 mvn clean package
 ```
 
+# URL injection
+
 The first step consists in creating a file _seeds.txt_ in the current 
directory and populating it with the URLs 
 to be used as a starting point for the crawl, e.g. 
 
@@ -22,6 +34,8 @@ storm local target/${artifactId}-${version}.jar  
org.apache.storm.flux.Flux inje
 
 Note that in local mode, Flux uses a default TTL for the topology of 20 secs. 
The command above runs the topology for 1 hour.
 
+# Running the crawl
+
 To start crawling, run the following command
 
 ``` sh
diff --git 
a/external/opensearch/archetype/src/main/resources/archetype-resources/docker-compose.yml
 
b/external/opensearch/archetype/src/main/resources/archetype-resources/docker-compose.yml
new file mode 100644
index 00000000..ccad3cc4
--- /dev/null
+++ 
b/external/opensearch/archetype/src/main/resources/archetype-resources/docker-compose.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+services:
+  zookeeper:
+    image: zookeeper:3.9.3
+    container_name: zookeeper
+    restart: always
+
+  nimbus:
+    image: storm:latest
+    container_name: nimbus
+    hostname: nimbus
+    command: storm nimbus
+    depends_on:
+      - zookeeper
+    restart: always
+
+  supervisor:
+    image: storm:latest
+    container_name: supervisor
+    command: storm supervisor -c worker.childopts=-Xmx%HEAP-MEM%m
+    depends_on:
+      - nimbus
+      - zookeeper
+    restart: always
+
+  ui:
+    image: storm:latest
+    container_name: ui
+    command: storm ui
+    depends_on:
+      - nimbus
+    restart: always
+    ports:
+      - "127.0.0.1:8080:8080"
+
+  opensearch-sc:
+    image: opensearchproject/opensearch:2.19.4
+    container_name: opensearch-sc
+    environment:
+      - cluster.name=opensearch-sc-cluster
+      - node.name=opensearch-sc
+      - discovery.type=single-node
+      - bootstrap.memory_lock=true # along with the memlock settings below, 
disables swapping
+      - "OPENSEARCH_JAVA_OPTS=-Xms4G -Xmx4G"
+      - plugins.security.disabled=true
+      - "DISABLE_INSTALL_DEMO_CONFIG=true"
+    volumes:
+      - opensearch-sc-data:/usr/share/opensearch/data
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536 # maximum number of open files for the OpenSearch user, 
set to at least 65536 on modern systems
+        hard: 65536
+    ports:
+      - "127.0.0.1:9200:9200" # REST API
+
+  opensearch-dashboard:
+    image: opensearchproject/opensearch-dashboards:2.19.4
+    container_name: dashboard
+    ports:
+      - "127.0.0.1:5601:5601"
+    expose:
+      - "5601"
+    environment:
+      - 'OPENSEARCH_HOSTS=["http://opensearch-sc:9200";]'
+      - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" # disables security 
dashboards plugin in OpenSearch Dashboards

Reply via email to