This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch fix_issues_from_ipmc_release_vote
in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git

commit 60e195d3f189bef56b3e27667def15d8b033bfa8
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Nov 22 08:25:44 2024 +0100

    Remove references to digitalpebble.com (sitemaps, src issue refs)
---
 .../main/resources/archetype-resources/README.md   |  3 +-
 .../org/apache/stormcrawler/bolt/FetcherBolt.java  |  6 +-
 .../apache/stormcrawler/bolt/JSoupParserBolt.java  |  2 +-
 .../stormcrawler/bolt/SimpleFetcherBolt.java       |  4 +-
 .../filtering/basic/BasicURLNormalizer.java        |  2 +-
 .../filtering/regex/FastURLFilter.java             |  2 +-
 .../filtering/sitemap/SitemapFilter.java           |  2 +-
 .../persistence/AbstractStatusUpdaterBolt.java     |  2 +-
 .../stormcrawler/protocol/ProtocolResponse.java    |  2 +-
 .../stormcrawler/util/CharsetIdentification.java   |  2 +-
 .../stormcrawler/bolt/SiteMapParserBoltTest.java   | 20 +++---
 .../filtering/BasicURLNormalizerTest.java          |  2 +-
 .../stormcrawler/filtering/FastURLFilterTest.java  |  2 +-
 .../stormcrawler/indexer/BasicIndexingTest.java    |  2 +-
 .../stormcrawler/jsoup/JSoupFiltersTest.java       |  2 +-
 .../stormcrawler/parse/StackOverflowTest.java      |  4 +-
 .../parse/filter/SubDocumentsFilterTest.java       |  4 +-
 .../stormcrawler/parse/filter/XPathFilterTest.java |  2 +-
 .../protocol/DelegationProtocolTest.java           |  2 +-
 .../digitalpebble.sitemap.extensions.news.xml      | 69 ------------------
 .../digitalpebble.sitemap.extensions.video.xml     | 79 ---------------------
 core/src/test/resources/digitalpebble.sitemap.xml  | 57 ---------------
 core/src/test/resources/fast.urlfilter.json        |  2 +-
 ...xml => stormcrawler.sitemap.extensions.all.xml} | 54 ++++++++-------
 ...l => stormcrawler.sitemap.extensions.image.xml} | 54 ++++++++-------
 ...l => stormcrawler.sitemap.extensions.links.xml} | 63 +++++++++--------
 ... => stormcrawler.sitemap.extensions.mobile.xml} | 63 +++++++++--------
 .../stormcrawler.sitemap.extensions.news.xml       | 70 +++++++++++++++++++
 .../stormcrawler.sitemap.extensions.video.xml      | 81 ++++++++++++++++++++++
 core/src/test/resources/stormcrawler.sitemap.xml   | 60 ++++++++++++++++
 .../stormcrawler/opensearch/bolt/DeletionBolt.java |  2 +-
 .../stormcrawler/opensearch/bolt/IndexerBolt.java  |  2 +-
 .../opensearch/persistence/StatusUpdaterBolt.java  |  2 +-
 .../opensearch/bolt/IndexerBoltTest.java           |  2 +-
 .../opensearch/bolt/StatusBoltTest.java            |  2 +-
 .../apache/stormcrawler/tika/ParserBoltTest.java   |  2 +-
 .../urlfrontier/ManagedChannelUtil.java            |  2 +-
 .../stormcrawler/warc/WARCRequestRecordFormat.java |  2 +-
 38 files changed, 375 insertions(+), 360 deletions(-)

diff --git a/archetype/src/main/resources/archetype-resources/README.md 
b/archetype/src/main/resources/archetype-resources/README.md
index e973f08f..9f4fce32 100644
--- a/archetype/src/main/resources/archetype-resources/README.md
+++ b/archetype/src/main/resources/archetype-resources/README.md
@@ -3,8 +3,7 @@ Have a look at the code and resources and modify them to your 
heart's content.
 
 # Prerequisites
 
-You need to install Apache Storm. The instructions on [setting up a Storm 
cluster](https://storm.apache.org/releases/2.6.2/Setting-up-a-Storm-cluster.html)
 should help. Alternatively, 
-the 
[stormcrawler-docker](https://github.com/DigitalPebble/stormcrawler-docker) 
project contains resources for running Apache Storm on Docker. 
+You need to install Apache Storm. The instructions on [setting up a Storm 
cluster](https://storm.apache.org/releases/2.6.2/Setting-up-a-Storm-cluster.html)
 should help. 
 
 You also need to have an instance of URLFrontier running. See [the URLFrontier 
README](https://github.com/crawler-commons/url-frontier/tree/master/service); 
the easiest way is to use Docker, like so:
 
diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
index b4da630a..3f1477d1 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
@@ -509,7 +509,7 @@ public class FetcherBolt extends StatusEmitterBolt {
                     metadata = new Metadata();
                 }
 
-                // https://github.com/DigitalPebble/storm-crawler/issues/813
+                // https://github.com/apache/incubator-stormcrawler/issues/813
                 metadata.remove("fetch.exception");
 
                 boolean asap = false;
@@ -568,7 +568,7 @@ public class FetcherBolt extends StatusEmitterBolt {
                     }
 
                     // has found sitemaps
-                    // 
https://github.com/DigitalPebble/storm-crawler/issues/710
+                    // 
https://github.com/apache/incubator-stormcrawler/issues/710
                     // note: we don't care if the sitemap URLs where actually
                     // kept
                     boolean foundSitemap = (rules.getSitemaps().size() > 0);
@@ -732,7 +732,7 @@ public class FetcherBolt extends StatusEmitterBolt {
                             mergedMD.setValue("_redirTo", redirection);
                         }
 
-                        // 
https://github.com/DigitalPebble/storm-crawler/issues/954
+                        // 
https://github.com/apache/incubator-stormcrawler/issues/954
                         if (allowRedirs() && 
StringUtils.isNotBlank(redirection)) {
                             emitOutlink(fit.t, url, redirection, mergedMD);
                         }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
index 015403d0..17214a4d 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
@@ -347,7 +347,7 @@ public class JSoupParserBolt extends StatusEmitterBolt {
                     LOG.info("Found redir in {} to {}", url, redirection);
                     metadata.setValue("_redirTo", redirection);
 
-                    // 
https://github.com/DigitalPebble/storm-crawler/issues/954
+                    // 
https://github.com/apache/incubator-stormcrawler/issues/954
                     if (allowRedirs() && StringUtils.isNotBlank(redirection)) {
                         emitOutlink(tuple, new URL(url), redirection, 
metadata);
                     }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
index 7c5ccfcc..0f783d78 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
@@ -256,7 +256,7 @@ public class SimpleFetcherBolt extends StatusEmitterBolt {
             metadata = new Metadata();
         }
 
-        // https://github.com/DigitalPebble/storm-crawler/issues/813
+        // https://github.com/apache/incubator-stormcrawler/issues/813
         metadata.remove("fetch.exception");
 
         URL url;
@@ -326,7 +326,7 @@ public class SimpleFetcherBolt extends StatusEmitterBolt {
             }
 
             // has found sitemaps
-            // https://github.com/DigitalPebble/storm-crawler/issues/710
+            // https://github.com/apache/incubator-stormcrawler/issues/710
             // note: we don't care if the sitemap URLs where actually
             // kept
             boolean foundSitemap = (rules.getSitemaps().size() > 0);
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
 
b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
index 7550327c..629bc976 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
@@ -50,7 +50,7 @@ public class BasicURLNormalizer extends URLFilter {
     /** Nutch 1098 - finds URL encoded parts of the URL */
     private static final Pattern unescapeRulePattern = 
Pattern.compile("%([0-9A-Fa-f]{2})");
 
-    /** https://github.com/DigitalPebble/storm-crawler/issues/401 * */
+    /** https://github.com/apache/incubator-stormcrawler/issues/401 * */
     private static final Pattern illegalEscapePattern = 
Pattern.compile("%u([0-9A-Fa-f]{4})");
 
     // charset used for encoding URLs before escaping
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java 
b/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
index 671b9c0f..50f528f2 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
@@ -112,7 +112,7 @@ public class FastURLFilter extends URLFilter implements 
JSONResource {
 
         // if it contains a single object
         // jump directly to its content
-        // https://github.com/DigitalPebble/storm-crawler/issues/1013
+        // https://github.com/apache/incubator-stormcrawler/issues/1013
         if (rootNode.size() == 1 && rootNode.isObject()) {
             rootNode = rootNode.fields().next().getValue();
         }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/sitemap/SitemapFilter.java
 
b/core/src/main/java/org/apache/stormcrawler/filtering/sitemap/SitemapFilter.java
index 6670663e..5beec278 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/sitemap/SitemapFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/sitemap/SitemapFilter.java
@@ -36,7 +36,7 @@ import org.jetbrains.annotations.Nullable;
  * </pre>
  *
  * Will be replaced by <a href=
- * "https://github.com/DigitalPebble/storm-crawler/issues/711";>MetadataFilter 
to filter based on
+ * 
"https://github.com/apache/incubator-stormcrawler/issues/711";>MetadataFilter to 
filter based on
  * multiple key values</a>
  *
  * @since 1.14
diff --git 
a/core/src/main/java/org/apache/stormcrawler/persistence/AbstractStatusUpdaterBolt.java
 
b/core/src/main/java/org/apache/stormcrawler/persistence/AbstractStatusUpdaterBolt.java
index 04bf9bfe..44d7a89f 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/persistence/AbstractStatusUpdaterBolt.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/persistence/AbstractStatusUpdaterBolt.java
@@ -207,7 +207,7 @@ public abstract class AbstractStatusUpdaterBolt extends 
BaseRichBolt {
         if (!status.equals(Status.FETCH_ERROR)) {
             metadata.remove(Constants.fetchErrorCountParamName);
         }
-        // https://github.com/DigitalPebble/storm-crawler/issues/415
+        // https://github.com/apache/incubator-stormcrawler/issues/415
         // remove error related key values in case of success
         if (status.equals(Status.FETCHED) || 
status.equals(Status.REDIRECTION)) {
             metadata.remove(Constants.STATUS_ERROR_CAUSE);
diff --git 
a/core/src/main/java/org/apache/stormcrawler/protocol/ProtocolResponse.java 
b/core/src/main/java/org/apache/stormcrawler/protocol/ProtocolResponse.java
index f997957f..b79163d8 100644
--- a/core/src/main/java/org/apache/stormcrawler/protocol/ProtocolResponse.java
+++ b/core/src/main/java/org/apache/stormcrawler/protocol/ProtocolResponse.java
@@ -58,7 +58,7 @@ public class ProtocolResponse {
 
     /**
      * @since 1.17
-     * @see <a 
href="https://github.com/DigitalPebble/storm-crawler/issues/776";>Issue 776</a>
+     * @see <a 
href="https://github.com/apache/incubator-stormcrawler/issues/776";>Issue 776</a>
      */
     public static final String PROTOCOL_MD_PREFIX_PARAM = "protocol.md.prefix";
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java 
b/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java
index b9a767a9..1ef8a712 100644
--- a/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java
+++ b/core/src/main/java/org/apache/stormcrawler/util/CharsetIdentification.java
@@ -186,7 +186,7 @@ public class CharsetIdentification {
         int start = html.indexOf("<meta charset=\"");
         if (start != -1) {
             int end = html.indexOf('"', start + 15);
-            // https://github.com/DigitalPebble/storm-crawler/issues/870
+            // https://github.com/apache/incubator-stormcrawler/issues/870
             // try on a slightly larger section of text if it is trimmed
             if (end == -1 && ((maxlength + 10) < buffer.length)) {
                 return getCharsetFromMeta(buffer, maxlength + 10);
diff --git 
a/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java 
b/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
index d96ce5f6..de8d7778 100644
--- a/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
@@ -52,8 +52,8 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(SiteMapParserBolt.isSitemapKey, "true");
         // and its mime-type
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
-        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"digitalpebble.sitemap.xml", metadata);
-        Assertions.assertEquals(6, 
output.getEmitted(Constants.StatusStreamName).size());
+        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"stormcrawler.sitemap.xml", metadata);
+        Assertions.assertEquals(7, 
output.getEmitted(Constants.StatusStreamName).size());
         // TODO test that the new links have the right metadata
         List<Object> fields = 
output.getEmitted(Constants.StatusStreamName).get(0);
         Assertions.assertEquals(3, fields.size());
@@ -101,7 +101,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.image.xml",
+                "stormcrawler.sitemap.extensions.image.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -120,7 +120,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.mobile.xml",
+                "stormcrawler.sitemap.extensions.mobile.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -139,7 +139,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.links.xml",
+                "stormcrawler.sitemap.extensions.links.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -158,7 +158,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.news.xml",
+                "stormcrawler.sitemap.extensions.news.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -177,7 +177,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.video.xml",
+                "stormcrawler.sitemap.extensions.video.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -203,7 +203,7 @@ class SiteMapParserBoltTest extends ParsingTester {
         metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
         parse(
                 "http://stormcrawler.apache.org/sitemap.xml";,
-                "digitalpebble.sitemap.extensions.all.xml",
+                "stormcrawler.sitemap.extensions.all.xml",
                 metadata);
         Values values = (Values) 
output.getEmitted(Constants.StatusStreamName).get(0);
         Metadata parsedMetadata = (Metadata) values.get(1);
@@ -237,8 +237,8 @@ class SiteMapParserBoltTest extends ParsingTester {
         Metadata metadata = new Metadata();
         // do not specify that it is a sitemap file
         // do not set the mimetype
-        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"digitalpebble.sitemap.xml", metadata);
-        Assertions.assertEquals(6, 
output.getEmitted(Constants.StatusStreamName).size());
+        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"stormcrawler.sitemap.xml", metadata);
+        Assertions.assertEquals(7, 
output.getEmitted(Constants.StatusStreamName).size());
         // TODO test that the new links have the right metadata
         List<Object> fields = 
output.getEmitted(Constants.StatusStreamName).get(0);
         Assertions.assertEquals(3, fields.size());
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
index b9594cc3..250ea401 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
@@ -289,7 +289,7 @@ class BasicURLNormalizerTest {
         assertEquals(expectedResult, normalizedUrl, "Failed to filter query 
string");
     }
 
-    // https://github.com/DigitalPebble/storm-crawler/issues/401
+    // https://github.com/apache/incubator-stormcrawler/issues/401
     @Test
     void testNonStandardPercentEncoding() throws MalformedURLException {
         URLFilter urlFilter = createFilter(false, false);
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
index fdf68fb3..4ea88b5b 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
@@ -53,7 +53,7 @@ class FastURLFilterTest {
         String filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(null, filterResult);
         // allowed
-        url = new URL("http://stormcrawler.net/digitalpebble/";);
+        url = new URL("http://stormcrawler.net/bla/";);
         filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toString(), filterResult);
     }
diff --git 
a/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java 
b/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
index 9b73fc26..d00bd4a6 100644
--- a/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
@@ -87,7 +87,7 @@ class BasicIndexingTest extends IndexerTester {
         config.put(AbstractIndexerBolt.urlFieldParamName, "url");
         config.put(AbstractIndexerBolt.canonicalMetadataParamName, 
"canonical");
         Metadata metadata = new Metadata();
-        metadata.setValue("canonical", "htp://www.digitalpebble.com/");
+        metadata.setValue("canonical", "htp://stormcrawler.apache.org/");
         prepareIndexerBolt(config);
         index(URL, metadata);
         Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
diff --git 
a/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java 
b/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
index de433d1c..33f96dbb 100644
--- a/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
@@ -58,7 +58,7 @@ class JSoupFiltersTest extends ParsingTester {
     }
 
     @Test
-    // https://github.com/DigitalPebble/storm-crawler/issues/219
+    // https://github.com/apache/incubator-stormcrawler/issues/219
     void testScriptExtraction() throws IOException {
         prepareParserBolt("test.jsoupfilters.json");
         parse("http://stormcrawler.apache.org";, 
"stormcrawler.apache.org.html");
diff --git 
a/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java 
b/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
index 3a0a3956..02abfab5 100644
--- a/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
@@ -28,7 +28,7 @@ import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
 /**
- * @see https://github.com/DigitalPebble/storm-crawler/pull/653 *
+ * @see https://github.com/apache/incubator-stormcrawler/pull/653 *
  */
 class StackOverflowTest extends ParsingTester {
 
@@ -47,7 +47,7 @@ class StackOverflowTest extends ParsingTester {
     }
 
     /**
-     * @see https://github.com/DigitalPebble/storm-crawler/issues/666 *
+     * @see https://github.com/apache/incubator-stormcrawler/issues/666 *
      */
     @Test
     void testNamespaceExtraction() throws IOException {
diff --git 
a/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
 
b/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
index 408d8503..f74c34a6 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
@@ -40,7 +40,7 @@ class SubDocumentsFilterTest extends ParsingTester {
         config.put("detect.mimetype", false);
         prepareParserBolt("test.subdocfilter.json", config);
         Metadata metadata = new Metadata();
-        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"digitalpebble.sitemap.xml", metadata);
-        Assertions.assertEquals(6, output.getEmitted().size());
+        parse("http://stormcrawler.apache.org/sitemap.xml";, 
"stormcrawler.sitemap.xml", metadata);
+        Assertions.assertEquals(7, output.getEmitted().size());
     }
 }
diff --git 
a/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
index a15e0833..7a8077f3 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
@@ -48,7 +48,7 @@ class XPathFilterTest extends ParsingTester {
     }
 
     @Test
-    // https://github.com/DigitalPebble/storm-crawler/issues/219
+    // https://github.com/apache/incubator-stormcrawler/issues/219
     void testScriptExtraction() throws IOException {
         prepareParserBolt("test.parsefilters.json");
         parse("http://stormcrawler.apache.org";, 
"stormcrawler.apache.org.html");
diff --git 
a/core/src/test/java/org/apache/stormcrawler/protocol/DelegationProtocolTest.java
 
b/core/src/test/java/org/apache/stormcrawler/protocol/DelegationProtocolTest.java
index 9a706829..a4d25cb6 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/protocol/DelegationProtocolTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/protocol/DelegationProtocolTest.java
@@ -40,7 +40,7 @@ class DelegationProtocolTest {
         // try single filter
         Metadata meta = new Metadata();
         meta.setValue("js", "true");
-        FilteredProtocol pf = 
superProto.getProtocolFor("https://digitalpebble.com";, meta);
+        FilteredProtocol pf = 
superProto.getProtocolFor("https://stormcrawler.apache.org";, meta);
         Assertions.assertEquals(pf.id, "second");
         // no filter at all
         meta = new Metadata();
diff --git a/core/src/test/resources/digitalpebble.sitemap.extensions.news.xml 
b/core/src/test/resources/digitalpebble.sitemap.extensions.news.xml
deleted file mode 100644
index 9243b66b..00000000
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.news.xml
+++ /dev/null
@@ -1,69 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<urlset
-      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
-      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
-            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
-      xmlns:news="http://www.google.com/schemas/sitemap-news/0.9";>
-<!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
-
-<url>
-  <loc>http://digitalpebble.com/</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>1.00</priority>
-  <news:news>
-    <news:publication>
-      <news:name>The Example Times</news:name>
-      <news:language>en</news:language>
-    </news:publication>
-    <news:genres>PressRelease, Blog</news:genres>
-    <news:publication_date>2008-12-23</news:publication_date>
-    <news:title>Companies A, B in Merger Talks</news:title>
-    <news:keywords>business, merger, acquisition, A, B</news:keywords>
-    <news:stock_tickers>NASDAQ:A, NASDAQ:B</news:stock_tickers>
-  </news:news>
-</url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-</urlset>
diff --git a/core/src/test/resources/digitalpebble.sitemap.extensions.video.xml 
b/core/src/test/resources/digitalpebble.sitemap.extensions.video.xml
deleted file mode 100644
index 20a6a792..00000000
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.video.xml
+++ /dev/null
@@ -1,79 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<urlset
-      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
-      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
-            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
-      xmlns:video="http://www.google.com/schemas/sitemap-video/1.1";>
-<!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
-
-<url>
-  <loc>http://digitalpebble.com/</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>1.00</priority>
-  <video:video>
-    
<video:thumbnail_loc>http://www.example.com/thumbs/123.jpg</video:thumbnail_loc>
-    <video:title>Grilling steaks for summer</video:title>
-    <video:description>Alkis shows you how to get perfectly done steaks every 
time</video:description>
-    <video:content_loc>http://www.example.com/video123.flv</video:content_loc>
-    <video:player_loc allow_embed="yes" 
autoplay="ap=1">http://www.example.com/videoplayer.swf?video=123</video:player_loc>
-    <video:duration>600</video:duration>
-    <video:expiration_date>2009-11-05T19:20:30+08:00</video:expiration_date>
-    <video:rating>4.2</video:rating>
-    <video:view_count>12345</video:view_count>
-    <video:publication_date>2007-11-05T19:20:30+08:00</video:publication_date>
-    <video:tag>sample_tag1</video:tag>
-    <video:tag>sample_tag2</video:tag>
-    <video:family_friendly>yes</video:family_friendly>
-    <video:restriction relationship="allow">IE GB US CA</video:restriction>
-    <video:gallery_loc title="Cooking 
Videos">http://cooking.example.com</video:gallery_loc>
-    <video:price currency="EUR">1.99</video:price>
-    <video:requires_subscription>yes</video:requires_subscription>
-    <video:uploader 
info="http://www.example.com/users/grillymcgrillerson";>GrillyMcGrillerson</video:uploader>
-    <video:live>no</video:live>
-  </video:video>
-</url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-</urlset>
diff --git a/core/src/test/resources/digitalpebble.sitemap.xml 
b/core/src/test/resources/digitalpebble.sitemap.xml
deleted file mode 100644
index 09cea4ba..00000000
--- a/core/src/test/resources/digitalpebble.sitemap.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements.  See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership.  The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License.  You may obtain a copy of the License at
-
-  http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied.  See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-<urlset
-      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
-      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
-      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
-            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";>
-<!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
-
-<url>
-  <loc>http://digitalpebble.com/</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>1.00</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-</urlset>
diff --git a/core/src/test/resources/fast.urlfilter.json 
b/core/src/test/resources/fast.urlfilter.json
index d51953b8..866d8c62 100644
--- a/core/src/test/resources/fast.urlfilter.json
+++ b/core/src/test/resources/fast.urlfilter.json
@@ -4,7 +4,7 @@
     "patterns" : [ "DenyPathQuery \\.jpg" ]
   }, {
     "scope" : "domain:stormcrawler.net",
-    "patterns" : [ "AllowPath /digitalpebble/", "DenyPath .+" ]
+    "patterns" : [ "AllowPath /bla/", "DenyPath .+" ]
   }, {
     "scope" : "metadata:key=value",
     "patterns" : [ "DenyPath .+" ]
diff --git a/core/src/test/resources/digitalpebble.sitemap.extensions.all.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.all.xml
similarity index 80%
rename from core/src/test/resources/digitalpebble.sitemap.extensions.all.xml
rename to core/src/test/resources/stormcrawler.sitemap.extensions.all.xml
index af3f14c7..6958b115 100644
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.all.xml
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.all.xml
@@ -76,28 +76,34 @@ under the License.
     <video:live>no</video:live>
   </video:video>
 </url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>1.00</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
 </urlset>
diff --git a/core/src/test/resources/digitalpebble.sitemap.extensions.image.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.image.xml
similarity index 65%
rename from core/src/test/resources/digitalpebble.sitemap.extensions.image.xml
rename to core/src/test/resources/stormcrawler.sitemap.extensions.image.xml
index f5dd7bbb..99ecb553 100644
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.image.xml
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.image.xml
@@ -38,28 +38,34 @@ under the License.
     
<image:license>https://creativecommons.org/licenses/by/4.0/legalcode</image:license>
   </image:image>
 </url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>1.00</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
 </urlset>
diff --git a/core/src/test/resources/digitalpebble.sitemap.extensions.links.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.links.xml
similarity index 54%
rename from core/src/test/resources/digitalpebble.sitemap.extensions.links.xml
rename to core/src/test/resources/stormcrawler.sitemap.extensions.links.xml
index 41382dce..4d52b284 100644
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.links.xml
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.links.xml
@@ -24,36 +24,35 @@ under the License.
             http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
         xmlns:xhtml="http://www.w3.org/1999/xhtml";>
 <!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
-
-<url>
-  <loc>http://digitalpebble.com/</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>1.00</priority>
-  <xhtml:link rel="alternate" hreflang="en" 
href="http://www.example.com/english/"; />
-</url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>1.00</priority>
+    <xhtml:link rel="alternate" hreflang="en" 
href="http://www.example.com/english/"; />
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
 </urlset>
diff --git 
a/core/src/test/resources/digitalpebble.sitemap.extensions.mobile.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.mobile.xml
similarity index 56%
rename from core/src/test/resources/digitalpebble.sitemap.extensions.mobile.xml
rename to core/src/test/resources/stormcrawler.sitemap.extensions.mobile.xml
index 16351dc7..685e302b 100644
--- a/core/src/test/resources/digitalpebble.sitemap.extensions.mobile.xml
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.mobile.xml
@@ -24,36 +24,35 @@ under the License.
             http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
       xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0";>
 <!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
-
-<url>
-  <loc>http://digitalpebble.com/</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>1.00</priority>
-  <mobile:mobile/>
-</url>
-<url>
-  <loc>http://digitalpebble.com/index.html</loc>
-  <lastmod>2012-12-05T10:59:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/solutions.html</loc>
-  <lastmod>2012-09-06T16:53:04+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/references.html</loc>
-  <lastmod>2014-04-16T14:40:10+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
-<url>
-  <loc>http://digitalpebble.com/contact.html</loc>
-  <lastmod>2012-12-05T10:59:00+00:00</lastmod>
-  <changefreq>monthly</changefreq>
-  <priority>0.80</priority>
-</url>
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>1.00</priority>
+    <mobile:mobile/>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
 </urlset>
diff --git a/core/src/test/resources/stormcrawler.sitemap.extensions.news.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.news.xml
new file mode 100644
index 00000000..7723c3c6
--- /dev/null
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.news.xml
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<urlset
+      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
+      xmlns:news="http://www.google.com/schemas/sitemap-news/0.9";>
+<!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.00</priority>
+    <news:news>
+      <news:publication>
+        <news:name>The Example Times</news:name>
+        <news:language>en</news:language>
+      </news:publication>
+      <news:genres>PressRelease, Blog</news:genres>
+      <news:publication_date>2008-12-23</news:publication_date>
+      <news:title>Companies A, B in Merger Talks</news:title>
+      <news:keywords>business, merger, acquisition, A, B</news:keywords>
+      <news:stock_tickers>NASDAQ:A, NASDAQ:B</news:stock_tickers>
+    </news:news>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+
+</urlset>
diff --git a/core/src/test/resources/stormcrawler.sitemap.extensions.video.xml 
b/core/src/test/resources/stormcrawler.sitemap.extensions.video.xml
new file mode 100644
index 00000000..8023bdda
--- /dev/null
+++ b/core/src/test/resources/stormcrawler.sitemap.extensions.video.xml
@@ -0,0 +1,81 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<urlset
+      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
+      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";
+      xmlns:video="http://www.google.com/schemas/sitemap-video/1.1";>
+<!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
+
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.00</priority>
+    <video:video>
+      
<video:thumbnail_loc>http://www.example.com/thumbs/123.jpg</video:thumbnail_loc>
+      <video:title>Grilling steaks for summer</video:title>
+      <video:description>Alkis shows you how to get perfectly done steaks 
every time</video:description>
+      
<video:content_loc>http://www.example.com/video123.flv</video:content_loc>
+      <video:player_loc allow_embed="yes" 
autoplay="ap=1">http://www.example.com/videoplayer.swf?video=123</video:player_loc>
+      <video:duration>600</video:duration>
+      <video:expiration_date>2009-11-05T19:20:30+08:00</video:expiration_date>
+      <video:rating>4.2</video:rating>
+      <video:view_count>12345</video:view_count>
+      
<video:publication_date>2007-11-05T19:20:30+08:00</video:publication_date>
+      <video:tag>sample_tag1</video:tag>
+      <video:tag>sample_tag2</video:tag>
+      <video:family_friendly>yes</video:family_friendly>
+      <video:restriction relationship="allow">IE GB US CA</video:restriction>
+      <video:gallery_loc title="Cooking 
Videos">http://cooking.example.com</video:gallery_loc>
+      <video:price currency="EUR">1.99</video:price>
+      <video:requires_subscription>yes</video:requires_subscription>
+      <video:uploader 
info="http://www.example.com/users/grillymcgrillerson";>GrillyMcGrillerson</video:uploader>
+      <video:live>no</video:live>
+    </video:video>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+
+</urlset>
diff --git a/core/src/test/resources/stormcrawler.sitemap.xml 
b/core/src/test/resources/stormcrawler.sitemap.xml
new file mode 100644
index 00000000..7561b576
--- /dev/null
+++ b/core/src/test/resources/stormcrawler.sitemap.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<urlset
+        xmlns="http://www.sitemaps.org/schemas/sitemap/0.9";
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+        xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
+            http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";>
+  <!-- created with Free Online Sitemap Generator www.xml-sitemaps.com -->
+
+
+  <url>
+    <loc>https://stormcrawler.apache.org/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>1.00</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/download/index.html</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/getting-started/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/faq/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+  <url>
+    <loc>https://stormcrawler.apache.org/support/</loc>
+    <lastmod>2024-10-19T11:21:53+00:00</lastmod>
+    <priority>0.80</priority>
+  </url>
+
+
+</urlset>
\ No newline at end of file
diff --git 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
index ceb976c4..d90c4c69 100644
--- 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
+++ 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/DeletionBolt.java
@@ -196,7 +196,7 @@ public class DeletionBolt extends BaseRichBolt
                                     return new 
BulkItemResponseToFailedFlag(bir, failed);
                                 })
                         .collect(
-                                // 
https://github.com/DigitalPebble/storm-crawler/issues/832
+                                // 
https://github.com/apache/incubator-stormcrawler/issues/832
                                 Collectors.groupingBy(
                                         idWithFailedFlagTuple -> 
idWithFailedFlagTuple.id,
                                         Collectors.toUnmodifiableList()));
diff --git 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
index ee553106..183bf15e 100644
--- 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
+++ 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/bolt/IndexerBolt.java
@@ -306,7 +306,7 @@ public class IndexerBolt extends AbstractIndexerBolt
                                     return new 
BulkItemResponseToFailedFlag(bir, failed);
                                 })
                         .collect(
-                                // 
https://github.com/DigitalPebble/storm-crawler/issues/832
+                                // 
https://github.com/apache/incubator-stormcrawler/issues/832
                                 Collectors.groupingBy(
                                         idWithFailedFlagTuple -> 
idWithFailedFlagTuple.id,
                                         Collectors.toUnmodifiableList()));
diff --git 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
index 1f8ea55a..a7708db3 100644
--- 
a/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
+++ 
b/external/opensearch/src/main/java/org/apache/stormcrawler/opensearch/persistence/StatusUpdaterBolt.java
@@ -339,7 +339,7 @@ public class StatusUpdaterBolt extends 
AbstractStatusUpdaterBolt
                                     return new 
BulkItemResponseToFailedFlag(bir, failed);
                                 })
                         .collect(
-                                // 
https://github.com/DigitalPebble/storm-crawler/issues/832
+                                // 
https://github.com/apache/incubator-stormcrawler/issues/832
                                 Collectors.groupingBy(
                                         idWithFailedFlagTuple -> 
idWithFailedFlagTuple.id,
                                         Collectors.toUnmodifiableList()));
diff --git 
a/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
 
b/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
index 60afe2f2..a53047da 100644
--- 
a/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
+++ 
b/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/IndexerBoltTest.java
@@ -114,7 +114,7 @@ class IndexerBoltTest extends AbstractOpenSearchTest {
 
     @Test
     @Timeout(value = 2, unit = TimeUnit.MINUTES)
-    // https://github.com/DigitalPebble/storm-crawler/issues/832
+    // https://github.com/apache/incubator-stormcrawler/issues/832
     void simultaneousCanonicals()
             throws ExecutionException, InterruptedException, TimeoutException {
         Metadata m1 = new Metadata();
diff --git 
a/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
 
b/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
index b6e41f62..6e738b0c 100644
--- 
a/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
+++ 
b/external/opensearch/src/test/java/org/apache/stormcrawler/opensearch/bolt/StatusBoltTest.java
@@ -129,7 +129,7 @@ class StatusBoltTest extends AbstractOpenSearchTest {
 
     @Test
     @Timeout(value = 2, unit = TimeUnit.MINUTES)
-    // see https://github.com/DigitalPebble/storm-crawler/issues/885
+    // see https://github.com/apache/incubator-stormcrawler/issues/885
     void checkListKeyFromOpensearch()
             throws IOException, ExecutionException, InterruptedException, 
TimeoutException {
         String url = "https://www.url.net/something";;
diff --git 
a/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java 
b/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
index c41c1403..f6196b87 100644
--- 
a/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
+++ 
b/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
@@ -74,7 +74,7 @@ class ParserBoltTest extends ParsingTester {
     /**
      * Checks that the mimetype whitelists are handled correctly
      *
-     * @see https://github.com/DigitalPebble/storm-crawler/issues/712
+     * @see https://github.com/apache/incubator-stormcrawler/issues/712
      */
     void testMimeTypeWhileList() throws IOException {
         Map conf = new HashMap();
diff --git 
a/external/urlfrontier/src/main/java/org/apache/stormcrawler/urlfrontier/ManagedChannelUtil.java
 
b/external/urlfrontier/src/main/java/org/apache/stormcrawler/urlfrontier/ManagedChannelUtil.java
index 1a7c65c8..360b04a8 100644
--- 
a/external/urlfrontier/src/main/java/org/apache/stormcrawler/urlfrontier/ManagedChannelUtil.java
+++ 
b/external/urlfrontier/src/main/java/org/apache/stormcrawler/urlfrontier/ManagedChannelUtil.java
@@ -27,7 +27,7 @@ import org.slf4j.LoggerFactory;
 
 /*
  * At some point we have to write a mechanism to share the same ManagedChannel 
in the same runtime
- * see: 
https://github.com/DigitalPebble/storm-crawler/pull/982#issuecomment-1175272094
+ * see: 
https://github.com/apache/incubator-stormcrawler/pull/982#issuecomment-1175272094
  */
 final class ManagedChannelUtil {
     private ManagedChannelUtil() {}
diff --git 
a/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCRequestRecordFormat.java
 
b/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCRequestRecordFormat.java
index 7e786dc4..d8c8ec66 100644
--- 
a/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCRequestRecordFormat.java
+++ 
b/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCRequestRecordFormat.java
@@ -74,7 +74,7 @@ public class WARCRequestRecordFormat extends WARCRecordFormat 
{
         /*
          * The request record ID is stored in the metadata so that a WARC
          * response record can later refer to it. Deactivated because of
-         * https://github.com/DigitalPebble/storm-crawler/issues/721
+         * https://github.com/apache/incubator-stormcrawler/issues/721
          */
         // metadata.setValue("_request.warc_record_id_", mainID);
 

Reply via email to