This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-stormcrawler.git
The following commit(s) were added to refs/heads/main by this push:
new b46cab38 [MINOR] update URLs in tests (#1435)
b46cab38 is described below
commit b46cab38b0d4344a2805be26ac7157734e9db842
Author: PJ Fanning <[email protected]>
AuthorDate: Tue Dec 3 12:54:34 2024 +0100
[MINOR] update URLs in tests (#1435)
---
.../apache/stormcrawler/bolt/FeedParserBoltTest.java | 2 +-
.../stormcrawler/bolt/JSoupParserBoltTest.java | 12 ++++++------
.../stormcrawler/bolt/SiteMapParserBoltTest.java | 20 ++++++++++----------
.../stormcrawler/indexer/BasicIndexingTest.java | 18 +++++++++---------
.../apache/stormcrawler/json/JsoupFilterTest.java | 4 ++--
.../apache/stormcrawler/jsoup/JSoupFiltersTest.java | 8 ++++----
.../stormcrawler/parse/DuplicateLinksTest.java | 2 +-
.../apache/stormcrawler/parse/StackOverflowTest.java | 4 ++--
.../parse/filter/CSVMetadataFilterTest.java | 2 +-
.../parse/filter/SubDocumentsFilterTest.java | 2 +-
.../stormcrawler/parse/filter/XPathFilterTest.java | 6 +++---
core/src/test/resources/stormcrawler.apache.org.html | 2 +-
.../org/apache/stormcrawler/tika/ParserBoltTest.java | 4 ++--
13 files changed, 43 insertions(+), 43 deletions(-)
diff --git
a/core/src/test/java/org/apache/stormcrawler/bolt/FeedParserBoltTest.java
b/core/src/test/java/org/apache/stormcrawler/bolt/FeedParserBoltTest.java
index 6da0749a..500e2ade 100644
--- a/core/src/test/java/org/apache/stormcrawler/bolt/FeedParserBoltTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/bolt/FeedParserBoltTest.java
@@ -86,7 +86,7 @@ class FeedParserBoltTest extends ParsingTester {
void testNonFeedParsing() throws IOException {
prepareParserBolt("test.parsefilters.json");
// do not specify that it is a feed file
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html", new Metadata());
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html", new Metadata());
Assertions.assertEquals(1, output.getEmitted().size());
}
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/bolt/JSoupParserBoltTest.java
b/core/src/test/java/org/apache/stormcrawler/bolt/JSoupParserBoltTest.java
index f7a6d614..59b4daa3 100644
--- a/core/src/test/java/org/apache/stormcrawler/bolt/JSoupParserBoltTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/bolt/JSoupParserBoltTest.java
@@ -119,7 +119,7 @@ class JSoupParserBoltTest extends ParsingTester {
void testNoScriptInText() throws IOException {
bolt.prepare(
new HashMap(), TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
List<Object> parsedTuple = output.getEmitted().remove(0);
// check in the metadata that the values match
String text = (String) parsedTuple.get(3);
@@ -133,7 +133,7 @@ class JSoupParserBoltTest extends ParsingTester {
void testNoFollowOutlinks() throws IOException {
bolt.prepare(
new HashMap(), TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
List<List<Object>> statusTuples =
output.getEmitted(Constants.StatusStreamName);
Assertions.assertEquals(25, statusTuples.size());
}
@@ -144,7 +144,7 @@ class JSoupParserBoltTest extends ParsingTester {
new HashMap(), TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
Metadata metadata = new Metadata();
metadata.setValues("X-Robots-Tag", new String[] {"noindex",
"nofollow"});
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html", metadata);
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html", metadata);
List<List<Object>> statusTuples =
output.getEmitted(Constants.StatusStreamName);
// no outlinks at all
Assertions.assertEquals(0, statusTuples.size());
@@ -170,7 +170,7 @@ class JSoupParserBoltTest extends ParsingTester {
new HashMap(), TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
for (int i = 0; i < tests.length; i++) {
byte[] bytes = tests[i].getBytes(StandardCharsets.UTF_8);
- parse("http://stormcrawler.apache.org", bytes, new Metadata());
+ parse("https://stormcrawler.apache.org", bytes, new Metadata());
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().remove(0);
// check in the metadata that the values match
@@ -205,7 +205,7 @@ class JSoupParserBoltTest extends ParsingTester {
void testExecuteWithOutlinksLimit() throws IOException {
stormConf.put("parser.emitOutlinks.max.per.page", 5);
bolt.prepare(stormConf, TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
List<List<Object>> statusTuples =
output.getEmitted(Constants.StatusStreamName);
// outlinks being limited by property
Assertions.assertEquals(5, statusTuples.size());
@@ -215,7 +215,7 @@ class JSoupParserBoltTest extends ParsingTester {
void testExecuteWithOutlinksLimitDisabled() throws IOException {
stormConf.put("parser.emitOutlinks.max.per.page", -1);
bolt.prepare(stormConf, TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
List<List<Object>> statusTuples =
output.getEmitted(Constants.StatusStreamName);
// outlinks NOT being limited by property, since is disabled with -1
Assertions.assertEquals(25, statusTuples.size());
diff --git
a/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
b/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
index 5fb57b56..2b09e190 100644
--- a/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/bolt/SiteMapParserBoltTest.java
@@ -52,7 +52,7 @@ class SiteMapParserBoltTest extends ParsingTester {
metadata.setValue(SiteMapParserBolt.isSitemapKey, "true");
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
- parse("http://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
+ parse("https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
Assertions.assertEquals(7,
output.getEmitted(Constants.StatusStreamName).size());
// TODO test that the new links have the right metadata
List<Object> fields =
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -68,7 +68,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap-index.xml",
+ "https://stormcrawler.apache.org/sitemap-index.xml",
"stormcrawler.sitemap.index.xml",
metadata);
for (List<Object> fields :
output.getEmitted(Constants.StatusStreamName)) {
@@ -103,7 +103,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.image.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -122,7 +122,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.mobile.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -141,7 +141,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.links.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -160,7 +160,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.news.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -179,7 +179,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.video.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -205,7 +205,7 @@ class SiteMapParserBoltTest extends ParsingTester {
// and its mime-type
metadata.setValue(HttpHeaders.CONTENT_TYPE, "application/xml");
parse(
- "http://stormcrawler.apache.org/sitemap.xml",
+ "https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.extensions.all.xml",
metadata);
Values values = (Values)
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -240,7 +240,7 @@ class SiteMapParserBoltTest extends ParsingTester {
Metadata metadata = new Metadata();
// do not specify that it is a sitemap file
// do not set the mimetype
- parse("http://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
+ parse("https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
Assertions.assertEquals(7,
output.getEmitted(Constants.StatusStreamName).size());
// TODO test that the new links have the right metadata
List<Object> fields =
output.getEmitted(Constants.StatusStreamName).get(0);
@@ -251,7 +251,7 @@ class SiteMapParserBoltTest extends ParsingTester {
void testNonSitemapParsing() throws IOException {
prepareParserBolt("test.parsefilters.json");
// do not specify that it is a sitemap file
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html", new Metadata());
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html", new Metadata());
Assertions.assertEquals(1, output.getEmitted().size());
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
b/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
index d00bd4a6..3bed3937 100644
--- a/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/indexer/BasicIndexingTest.java
@@ -29,7 +29,7 @@ import org.junit.jupiter.api.Test;
class BasicIndexingTest extends IndexerTester {
- private static final String URL = "http://stormcrawler.apache.org";
+ private static final String URL = "https://stormcrawler.apache.org";
@BeforeEach
void setupIndexerBolt() {
@@ -55,12 +55,12 @@ class BasicIndexingTest extends IndexerTester {
config.put(AbstractIndexerBolt.urlFieldParamName, "url");
config.put(AbstractIndexerBolt.canonicalMetadataParamName,
"canonical");
Metadata metadata = new Metadata();
- metadata.setValue("canonical", "http://stormcrawler.apache.org/");
+ metadata.setValue("canonical", "https://stormcrawler.apache.org/");
prepareIndexerBolt(config);
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org/",
+ "https://stormcrawler.apache.org/",
fields.get("url"),
"Use the canonical URL if found");
}
@@ -76,7 +76,7 @@ class BasicIndexingTest extends IndexerTester {
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org/home",
+ "https://stormcrawler.apache.org/home",
fields.get("url"),
"Use the canonical URL if found");
}
@@ -92,7 +92,7 @@ class BasicIndexingTest extends IndexerTester {
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org",
+ "https://stormcrawler.apache.org",
fields.get("url"),
"Use the default URL if a bad canonical URL is found");
}
@@ -108,7 +108,7 @@ class BasicIndexingTest extends IndexerTester {
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org",
+ "https://stormcrawler.apache.org",
fields.get("url"),
"Ignore if the canonical URL references other host");
}
@@ -118,12 +118,12 @@ class BasicIndexingTest extends IndexerTester {
Map config = new HashMap();
config.put(AbstractIndexerBolt.urlFieldParamName, "url");
Metadata metadata = new Metadata();
- metadata.setValue("canonical", "http://stormcrawler.apache.org/");
+ metadata.setValue("canonical", "https://stormcrawler.apache.org/");
prepareIndexerBolt(config);
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org",
+ "https://stormcrawler.apache.org",
fields.get("url"),
"Use the canonical URL if found");
}
@@ -139,7 +139,7 @@ class BasicIndexingTest extends IndexerTester {
index(URL, metadata);
Map<String, String> fields = ((DummyIndexer) bolt).returnFields();
Assertions.assertEquals(
- "http://stormcrawler.apache.org",
+ "https://stormcrawler.apache.org",
fields.get("url"),
"The document must pass if the key/value is found in the
metadata");
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/json/JsoupFilterTest.java
b/core/src/test/java/org/apache/stormcrawler/json/JsoupFilterTest.java
index 5a56d655..bb77f167 100644
--- a/core/src/test/java/org/apache/stormcrawler/json/JsoupFilterTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/json/JsoupFilterTest.java
@@ -45,7 +45,7 @@ class JsoupFilterTest extends ParsingTester {
@Test
void testLDJsonExtraction() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -57,7 +57,7 @@ class JsoupFilterTest extends ParsingTester {
@Test
void testLinkFilter() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
List<List<Object>> status = output.getEmitted("status");
Assertions.assertEquals(31, status.size());
List<Object> parsedTuple = status.get(0);
diff --git
a/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
b/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
index 33f96dbb..ff1199de 100644
--- a/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/jsoup/JSoupFiltersTest.java
@@ -46,7 +46,7 @@ class JSoupFiltersTest extends ParsingTester {
@Test
void testBasicExtraction() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -61,7 +61,7 @@ class JSoupFiltersTest extends ParsingTester {
// https://github.com/apache/incubator-stormcrawler/issues/219
void testScriptExtraction() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -77,7 +77,7 @@ class JSoupFiltersTest extends ParsingTester {
@Test
void testLDJsonExtraction() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -89,7 +89,7 @@ class JSoupFiltersTest extends ParsingTester {
@Test
void testExtraLink() throws IOException {
prepareParserBolt("test.jsoupfilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(31, output.getEmitted("status").size());
}
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/parse/DuplicateLinksTest.java
b/core/src/test/java/org/apache/stormcrawler/parse/DuplicateLinksTest.java
index 28835355..e105c835 100644
--- a/core/src/test/java/org/apache/stormcrawler/parse/DuplicateLinksTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/parse/DuplicateLinksTest.java
@@ -44,7 +44,7 @@ class DuplicateLinksTest extends ParsingTester {
config.put("urlfilters.config.file", "basicurlnormalizer.json");
bolt.prepare(config, TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
Metadata metadata = new Metadata();
- parse("http://stormcrawler.apache.org/duplicates.html",
"duplicateLinks.html", metadata);
+ parse("https://stormcrawler.apache.org/duplicates.html",
"duplicateLinks.html", metadata);
Assertions.assertEquals(1,
output.getEmitted(Constants.StatusStreamName).size());
}
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
b/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
index ddff97da..eed28321 100644
--- a/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/parse/StackOverflowTest.java
@@ -42,7 +42,7 @@ class StackOverflowTest extends ParsingTester {
void testStackOverflow() throws IOException {
prepareParserBolt("test.parsefilters.json");
Metadata metadata = new Metadata();
- parse("http://stormcrawler.apache.org.com", "stackexception.html",
metadata);
+ parse("https://stormcrawler.apache.org", "stackexception.html",
metadata);
Assertions.assertEquals(31,
output.getEmitted(Constants.StatusStreamName).size());
}
@@ -53,7 +53,7 @@ class StackOverflowTest extends ParsingTester {
void testNamespaceExtraction() throws IOException {
prepareParserBolt("test.parsefilters.json");
Metadata metadata = new Metadata();
- parse("http://stormcrawler.apache.org.com", "stackexception.html",
metadata);
+ parse("https://stormcrawler.apache.org", "stackexception.html",
metadata);
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> obj = output.getEmitted().get(0);
Metadata m = (Metadata) obj.get(2);
diff --git
a/core/src/test/java/org/apache/stormcrawler/parse/filter/CSVMetadataFilterTest.java
b/core/src/test/java/org/apache/stormcrawler/parse/filter/CSVMetadataFilterTest.java
index d460c6a5..e6f3583c 100644
---
a/core/src/test/java/org/apache/stormcrawler/parse/filter/CSVMetadataFilterTest.java
+++
b/core/src/test/java/org/apache/stormcrawler/parse/filter/CSVMetadataFilterTest.java
@@ -36,7 +36,7 @@ class CSVMetadataFilterTest extends ParsingTester {
@Test
void testMultivalued() throws IOException {
prepareParserBolt("test.parsefilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
diff --git
a/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
b/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
index f74c34a6..efb9f737 100644
---
a/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
+++
b/core/src/test/java/org/apache/stormcrawler/parse/filter/SubDocumentsFilterTest.java
@@ -40,7 +40,7 @@ class SubDocumentsFilterTest extends ParsingTester {
config.put("detect.mimetype", false);
prepareParserBolt("test.subdocfilter.json", config);
Metadata metadata = new Metadata();
- parse("http://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
+ parse("https://stormcrawler.apache.org/sitemap.xml",
"stormcrawler.sitemap.xml", metadata);
Assertions.assertEquals(7, output.getEmitted().size());
}
}
diff --git
a/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
b/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
index 7a8077f3..d31d1d9b 100644
---
a/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
+++
b/core/src/test/java/org/apache/stormcrawler/parse/filter/XPathFilterTest.java
@@ -36,7 +36,7 @@ class XPathFilterTest extends ParsingTester {
@Test
void testBasicExtraction() throws IOException {
prepareParserBolt("test.parsefilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -51,7 +51,7 @@ class XPathFilterTest extends ParsingTester {
// https://github.com/apache/incubator-stormcrawler/issues/219
void testScriptExtraction() throws IOException {
prepareParserBolt("test.parsefilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
@@ -67,7 +67,7 @@ class XPathFilterTest extends ParsingTester {
@Test
void testLDJsonExtraction() throws IOException {
prepareParserBolt("test.parsefilters.json");
- parse("http://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
+ parse("https://stormcrawler.apache.org",
"stormcrawler.apache.org.html");
Assertions.assertEquals(1, output.getEmitted().size());
List<Object> parsedTuple = output.getEmitted().get(0);
Metadata metadata = (Metadata) parsedTuple.get(2);
diff --git a/core/src/test/resources/stormcrawler.apache.org.html
b/core/src/test/resources/stormcrawler.apache.org.html
index 9d26cce2..455fa938 100644
--- a/core/src/test/resources/stormcrawler.apache.org.html
+++ b/core/src/test/resources/stormcrawler.apache.org.html
@@ -44,7 +44,7 @@ under the License.
"addressLocality": "Wilmington",
"streetAddress": "1000 N West Street, Suite 1200"
},
- "url": "http://stormcrawler.apache.org/"
+ "url": "https://stormcrawler.apache.org/"
}
</script>
diff --git
a/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
b/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
index 6f98ad12..c728d1a4 100644
---
a/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
+++
b/external/tika/src/test/java/org/apache/stormcrawler/tika/ParserBoltTest.java
@@ -55,7 +55,7 @@ class ParserBoltTest extends ParsingTester {
conf.put("parser.extract.embedded", true);
bolt.prepare(conf, TestUtil.getMockedTopologyContext(), new
OutputCollector(output));
parse(
- "http://stormcrawler.apache.org/test_recursive_embedded.docx",
+ "https://stormcrawler.apache.org/test_recursive_embedded.docx",
"test_recursive_embedded.docx");
List<List<Object>> outTuples = output.getEmitted();
// TODO could we get as many subdocs as embedded in the original one?
@@ -99,7 +99,7 @@ class ParserBoltTest extends ParsingTester {
"http." + HttpHeaders.CONTENT_TYPE,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document");
parse(
- "http://stormcrawler.apache.org/test_recursive_embedded.docx",
+ "https://stormcrawler.apache.org/test_recursive_embedded.docx",
"test_recursive_embedded.docx",
metadata);
outTuples = output.getEmitted();