This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch 1597
in repository https://gitbox.apache.org/repos/asf/stormcrawler.git

commit e9d0e404a5a646634af4a49b42e6684a50973782
Author: Richard Zowalla <[email protected]>
AuthorDate: Tue Dec 23 10:45:23 2025 +0100

    First steps in migration from URL to URI. Leads to some corner cases to 
discuss in normalizer / architecture of normalization.
---
 .../apache/stormcrawler/bolt/FeedParserBolt.java   |  3 +-
 .../org/apache/stormcrawler/bolt/FetcherBolt.java  |  7 +-
 .../apache/stormcrawler/bolt/JSoupParserBolt.java  | 16 +++--
 .../stormcrawler/bolt/SimpleFetcherBolt.java       |  5 +-
 .../stormcrawler/bolt/SiteMapParserBolt.java       |  6 +-
 .../stormcrawler/bolt/URLPartitionerBolt.java      |  5 +-
 .../apache/stormcrawler/filtering/URLFilters.java  |  3 +-
 .../filtering/basic/BasicURLNormalizer.java        | 22 ++++--
 .../stormcrawler/filtering/host/HostURLFilter.java |  6 +-
 .../filtering/regex/FastURLFilter.java             |  8 ++-
 .../filtering/robots/RobotsFilter.java             |  6 +-
 .../stormcrawler/indexing/AbstractIndexerBolt.java |  6 +-
 .../apache/stormcrawler/jsoup/LinkParseFilter.java |  8 ++-
 .../apache/stormcrawler/parse/JSoupFilters.java    |  6 +-
 .../apache/stormcrawler/parse/ParseFilters.java    |  6 +-
 .../stormcrawler/parse/filter/LinkParseFilter.java |  8 ++-
 .../stormcrawler/protocol/RobotRulesParser.java    |  4 +-
 .../stormcrawler/protocol/file/FileResponse.java   |  6 +-
 .../stormcrawler/protocol/okhttp/HttpProtocol.java |  6 +-
 .../apache/stormcrawler/util/URLPartitioner.java   |  6 +-
 .../java/org/apache/stormcrawler/util/URLUtil.java | 17 ++---
 .../stormcrawler/filtering/BasicURLFilterTest.java | 14 ++--
 .../filtering/BasicURLNormalizerTest.java          | 84 +++++++++++-----------
 .../stormcrawler/filtering/FastURLFilterTest.java  | 16 +++--
 .../stormcrawler/filtering/HostURLFilterTest.java  | 18 ++---
 .../stormcrawler/filtering/MaxDepthFilterTest.java | 18 ++---
 .../filtering/MetadataFilterFromJsonTest.java      | 38 +++++-----
 .../stormcrawler/filtering/MetadataFilterTest.java | 58 +++++++--------
 .../stormcrawler/filtering/RegexFilterTest.java    | 20 +++---
 .../stormcrawler/util/CookieConverterTest.java     |  6 +-
 .../protocol/selenium/RemoteDriverProtocol.java    |  3 +-
 .../org/apache/stormcrawler/tika/ParserBolt.java   |  6 +-
 .../org/apache/stormcrawler/warc/WARCSpout.java    |  6 +-
 33 files changed, 257 insertions(+), 190 deletions(-)

diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
index 3abe00ff..7c451167 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/FeedParserBolt.java
@@ -25,6 +25,7 @@ import com.rometools.rome.feed.synd.SyndEntry;
 import com.rometools.rome.feed.synd.SyndFeed;
 import com.rometools.rome.io.SyndFeedInput;
 import java.io.ByteArrayInputStream;
+import java.net.URI;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@@ -175,7 +176,7 @@ public class FeedParserBolt extends StatusEmitterBolt {
             feed = input.build(new InputSource(is));
         }
 
-        URL url1 = new URL(url);
+        URL url1 = new URI(url).toURL();
 
         List<SyndEntry> entries = feed.getEntries();
         for (SyndEntry entry : entries) {
diff --git a/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
index 295631b1..c07474a2 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/FetcherBolt.java
@@ -22,6 +22,7 @@ import crawlercommons.robots.BaseRobotRules;
 import java.io.File;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
+import java.net.URI;
 import java.net.URL;
 import java.net.UnknownHostException;
 import java.time.Instant;
@@ -529,7 +530,7 @@ public class FetcherBolt extends StatusEmitterBolt {
                 boolean asap = false;
 
                 try {
-                    URL url = new URL(fit.url);
+                    URL url = new URI(fit.url).toURL();
                     Protocol protocol = protocolFactory.getProtocol(url);
 
                     if (protocol == null) {
@@ -982,8 +983,8 @@ public class FetcherBolt extends StatusEmitterBolt {
         URL url;
 
         try {
-            url = new URL(urlString);
-        } catch (MalformedURLException e) {
+            url = new URI(urlString).toURL();
+        } catch (Exception e) {
             LOG.error("{} is a malformed URL", urlString);
 
             Metadata metadata = (Metadata) input.getValueByField("metadata");
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
index 933f41bd..9970bb8f 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/JSoupParserBolt.java
@@ -24,6 +24,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.lang.reflect.InvocationTargetException;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
@@ -293,7 +295,7 @@ public class JSoupParserBolt extends StatusEmitterBolt {
             } else {
                 final Elements links = jsoupDoc.select("a[href]");
                 slinks = new HashMap<>(links.size());
-                final URL baseUrl = new URL(url);
+                final URL baseUrl = new URI(url).toURL();
                 for (Element link : links) {
                     // nofollow
                     String[] relkeywords = link.attr("rel").split(" ");
@@ -375,7 +377,7 @@ public class JSoupParserBolt extends StatusEmitterBolt {
 
                     // https://github.com/apache/stormcrawler/issues/954
                     if (allowRedirs() && StringUtils.isNotBlank(redirection)) {
-                        emitOutlink(tuple, new URL(url), redirection, 
metadata);
+                        emitOutlink(tuple, new URI(url).toURL(), redirection, 
metadata);
                     }
 
                     // Mark URL as redirected
@@ -387,8 +389,8 @@ public class JSoupParserBolt extends StatusEmitterBolt {
                     eventCounter.scope("tuple_success").incr();
                     return;
                 }
-            } catch (MalformedURLException e) {
-                LOG.error("MalformedURLException on {}", url);
+            } catch (MalformedURLException | URISyntaxException e) {
+                LOG.error("Exception on {}", url, e);
             }
         }
 
@@ -516,11 +518,11 @@ public class JSoupParserBolt extends StatusEmitterBolt {
 
         URL sourceUrl;
         try {
-            sourceUrl = new URL(url);
-        } catch (MalformedURLException e) {
+            sourceUrl = new URI(url).toURL();
+        } catch (Exception e) {
             // we would have known by now as previous components check whether
             // the URL is valid
-            LOG.error("MalformedURLException on {}", url);
+            LOG.error("Exception on {}", url, e);
             eventCounter.scope("error_invalid_source_url").incrBy(1);
             return new LinkedList<>();
         }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
index e5eb16f5..c79d7449 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/SimpleFetcherBolt.java
@@ -23,6 +23,7 @@ import crawlercommons.domains.PaidLevelDomain;
 import crawlercommons.robots.BaseRobotRules;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
+import java.net.URI;
 import java.net.URL;
 import java.net.UnknownHostException;
 import java.text.SimpleDateFormat;
@@ -265,8 +266,8 @@ public class SimpleFetcherBolt extends StatusEmitterBolt {
         URL url;
 
         try {
-            url = new URL(urlString);
-        } catch (MalformedURLException e) {
+            url = new URI(urlString).toURL();
+        } catch (Exception e) {
             LOG.error("{} is a malformed URL", urlString);
             // Report to status stream and ack
             metadata.setValue(Constants.STATUS_ERROR_CAUSE, "malformed URL");
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
index 6736b41f..acbebf41 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/SiteMapParserBolt.java
@@ -31,6 +31,8 @@ import crawlercommons.sitemaps.UnknownFormatException;
 import crawlercommons.sitemaps.extension.Extension;
 import crawlercommons.sitemaps.extension.ExtensionMetadata;
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@@ -181,9 +183,9 @@ public class SiteMapParserBolt extends StatusEmitterBolt {
 
     private List<Outlink> parseSiteMap(
             String url, byte[] content, String contentType, Metadata 
parentMetadata)
-            throws UnknownFormatException, IOException {
+            throws UnknownFormatException, IOException, URISyntaxException {
 
-        URL url1 = new URL(url);
+        URL url1 = new URI(url).toURL();
         long start = System.currentTimeMillis();
         AbstractSiteMap siteMap;
         // let the parser guess what the mimetype is
diff --git 
a/core/src/main/java/org/apache/stormcrawler/bolt/URLPartitionerBolt.java 
b/core/src/main/java/org/apache/stormcrawler/bolt/URLPartitionerBolt.java
index 8f44779f..f11f2a8e 100644
--- a/core/src/main/java/org/apache/stormcrawler/bolt/URLPartitionerBolt.java
+++ b/core/src/main/java/org/apache/stormcrawler/bolt/URLPartitionerBolt.java
@@ -20,6 +20,7 @@ package org.apache.stormcrawler.bolt;
 import crawlercommons.domains.PaidLevelDomain;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
+import java.net.URI;
 import java.net.URL;
 import java.util.Collections;
 import java.util.LinkedHashMap;
@@ -82,9 +83,9 @@ public class URLPartitionerBolt extends BaseRichBolt {
         if (partitionKey == null) {
             URL u;
             try {
-                u = new URL(url);
+                u = new URI(url).toURL();
                 host = u.getHost();
-            } catch (MalformedURLException e1) {
+            } catch (Exception e1) {
                 eventCounter.scope("Invalid URL").incrBy(1);
                 LOG.warn("Invalid URL: {}", url);
                 // ack it so that it doesn't get replayed
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java 
b/core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java
index 59f36b6f..ce85ec3e 100644
--- a/core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java
+++ b/core/src/main/java/org/apache/stormcrawler/filtering/URLFilters.java
@@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URI;
 import java.net.URL;
 import java.util.List;
 import java.util.Map;
@@ -185,7 +186,7 @@ public class URLFilters extends URLFilter implements 
JSONResource {
                 for (URLFilter filter : filters.filters) {
                     long start = System.currentTimeMillis();
                     normalizedUrl =
-                            filter.filter(new URL(sourceUrl), new Metadata(), 
normalizedUrl);
+                            filter.filter(new URI(sourceUrl).toURL(), new 
Metadata(), normalizedUrl);
                     long end = System.currentTimeMillis();
                     System.out.println(
                             "\t["
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
 
b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
index 9a5692ef..ca029223 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/basic/BasicURLNormalizer.java
@@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.node.ArrayNode;
 import java.net.IDN;
 import java.net.MalformedURLException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
@@ -120,7 +121,7 @@ public class BasicURLNormalizer extends URLFilter {
         }
 
         try {
-            URL theUrl = new URL(urlToFilter);
+            URL theUrl = new URI(urlToFilter).toURL();
             String file = theUrl.getFile();
             String protocol = theUrl.getProtocol();
             String host = theUrl.getHost();
@@ -152,9 +153,18 @@ public class BasicURLNormalizer extends URLFilter {
                 hasChanged = true;
             }
             if (hasChanged) {
-                urlToFilter = new URL(protocol, host, port, file2).toString();
+                URI uri = new URI(
+                        protocol,
+                        null,     // userInfo
+                        host,
+                        port,
+                        file2,    // path
+                        null,     // query
+                        null      // fragment
+                );
+                urlToFilter = uri.toString();
             }
-        } catch (MalformedURLException e) {
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
 
@@ -223,7 +233,7 @@ public class BasicURLNormalizer extends URLFilter {
         try {
             // Handle illegal characters by making a url first
             // this will clean illegal characters like |
-            final URL url = new URL(urlToFilter);
+            final URL url = new URI(urlToFilter).toURL();
 
             String query = url.getQuery();
             String path = url.getPath();
@@ -287,8 +297,8 @@ public class BasicURLNormalizer extends URLFilter {
                     + ((s = newQueryString) != null ? '?' + s : "")
                     + ((s = url.getRef()) != null ? '#' + s : "");
 
-        } catch (MalformedURLException e) {
-            LOG.warn("Invalid urlToFilter {}. {}", urlToFilter, e);
+        } catch (MalformedURLException | URISyntaxException e) {
+            LOG.warn("Invalid urlToFilter {}.", urlToFilter, e);
             return null;
         }
     }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/host/HostURLFilter.java 
b/core/src/main/java/org/apache/stormcrawler/filtering/host/HostURLFilter.java
index f7e5055b..ab104a66 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/host/HostURLFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/host/HostURLFilter.java
@@ -20,6 +20,8 @@ package org.apache.stormcrawler.filtering.host;
 import com.fasterxml.jackson.databind.JsonNode;
 import crawlercommons.domains.PaidLevelDomain;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Map;
 import org.apache.stormcrawler.Metadata;
@@ -82,8 +84,8 @@ public class HostURLFilter extends URLFilter {
 
         URL url;
         try {
-            url = new URL(urlToFilter);
-        } catch (MalformedURLException e1) {
+            url = new URI(urlToFilter).toURL();
+        } catch (MalformedURLException | URISyntaxException e1) {
             return null;
         }
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java 
b/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
index 854b464a..9f2c9b3a 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/regex/FastURLFilter.java
@@ -24,6 +24,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -177,7 +179,7 @@ public class FastURLFilter extends URLFilter implements 
JSONResource {
             if (rules.filter(urlToFilter, sourceMetadata)) {
                 return null;
             }
-        } catch (MalformedURLException e) {
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
         return urlToFilter;
@@ -210,8 +212,8 @@ class Rules {
      *
      * @throws MalformedURLException
      */
-    public boolean filter(String url, Metadata metadata) throws 
MalformedURLException {
-        URL u = new URL(url);
+    public boolean filter(String url, Metadata metadata) throws 
MalformedURLException, URISyntaxException {
+        URL u = new URI(url).toURL();
 
         // first try the full hostname
         String hostname = u.getHost();
diff --git 
a/core/src/main/java/org/apache/stormcrawler/filtering/robots/RobotsFilter.java 
b/core/src/main/java/org/apache/stormcrawler/filtering/robots/RobotsFilter.java
index 07140c2f..42e775db 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/filtering/robots/RobotsFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/filtering/robots/RobotsFilter.java
@@ -20,6 +20,8 @@ package org.apache.stormcrawler.filtering.robots;
 import com.fasterxml.jackson.databind.JsonNode;
 import crawlercommons.robots.BaseRobotRules;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Map;
 import org.apache.storm.Config;
@@ -61,8 +63,8 @@ public class RobotsFilter extends URLFilter {
             @NotNull String urlToFilter) {
         URL target;
         try {
-            target = new URL(urlToFilter);
-        } catch (MalformedURLException e) {
+            target = new URI(urlToFilter).toURL();
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/indexing/AbstractIndexerBolt.java 
b/core/src/main/java/org/apache/stormcrawler/indexing/AbstractIndexerBolt.java
index 91175846..4ece0b75 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/indexing/AbstractIndexerBolt.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/indexing/AbstractIndexerBolt.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.indexing;
 
 import crawlercommons.domains.PaidLevelDomain;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -286,7 +288,7 @@ public abstract class AbstractIndexerBolt extends 
BaseRichBolt {
         }
 
         try {
-            URL url1 = new URL(url);
+            URL url1 = new URI(url).toURL();
             URL canonical = URLUtil.resolveUrl(url1, canonicalValue);
 
             String domain = PaidLevelDomain.getPLD(url1.getHost());
@@ -298,7 +300,7 @@ public abstract class AbstractIndexerBolt extends 
BaseRichBolt {
             } else {
                 log.info("Canonical URL references a different domain, 
ignoring in {} ", url);
             }
-        } catch (MalformedURLException e) {
+        } catch (MalformedURLException | URISyntaxException e) {
             log.error("Malformed canonical URL {} was found in {} ", 
canonicalValue, url);
         }
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/jsoup/LinkParseFilter.java 
b/core/src/main/java/org/apache/stormcrawler/jsoup/LinkParseFilter.java
index 1125c374..87d66751 100644
--- a/core/src/main/java/org/apache/stormcrawler/jsoup/LinkParseFilter.java
+++ b/core/src/main/java/org/apache/stormcrawler/jsoup/LinkParseFilter.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.jsoup;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -73,11 +75,11 @@ public class LinkParseFilter extends XPathFilter {
 
         java.net.URL sourceUrl;
         try {
-            sourceUrl = new URL(url);
-        } catch (MalformedURLException e1) {
+            sourceUrl = new URI(url).toURL();
+        } catch (MalformedURLException | URISyntaxException e1) {
             // we would have known by now as previous components check whether
             // the URL is valid
-            LOG.error("MalformedURLException on {}", url);
+            LOG.error("Malformed URL on {}", url);
             return;
         }
 
diff --git a/core/src/main/java/org/apache/stormcrawler/parse/JSoupFilters.java 
b/core/src/main/java/org/apache/stormcrawler/parse/JSoupFilters.java
index 8fd8adc3..9a9ccb36 100644
--- a/core/src/main/java/org/apache/stormcrawler/parse/JSoupFilters.java
+++ b/core/src/main/java/org/apache/stormcrawler/parse/JSoupFilters.java
@@ -21,6 +21,8 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -126,7 +128,7 @@ public class JSoupFilters extends AbstractConfigurable 
implements JSoupFilter, J
     }
 
     /** * Used for quick testing + debugging */
-    public static void main(String[] args) throws IOException, ParseException {
+    public static void main(String[] args) throws IOException, ParseException, 
URISyntaxException {
 
         Config conf = new Config();
 
@@ -154,7 +156,7 @@ public class JSoupFilters extends AbstractConfigurable 
implements JSoupFilter, J
 
         String url = cmd.getArgs()[0];
 
-        byte[] content = IOUtils.toByteArray((new URL(url)).openStream());
+        byte[] content = IOUtils.toByteArray((new 
URI(url).toURL()).openStream());
 
         Document doc = Jsoup.parse(new String(content, 
StandardCharsets.UTF_8), url);
 
diff --git a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java 
b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
index 6852d590..9f2ae827 100644
--- a/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
+++ b/core/src/main/java/org/apache/stormcrawler/parse/ParseFilters.java
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -159,7 +161,7 @@ public class ParseFilters extends ParseFilter implements 
JSONResource {
      *
      * @since 1.17
      */
-    public static void main(String[] args) throws IOException, ParseException {
+    public static void main(String[] args) throws IOException, ParseException, 
URISyntaxException {
 
         Config conf = new Config();
 
@@ -187,7 +189,7 @@ public class ParseFilters extends ParseFilter implements 
JSONResource {
 
         String url = cmd.getArgs()[0];
 
-        byte[] content = IOUtils.toByteArray((new URL(url)).openStream());
+        byte[] content = IOUtils.toByteArray((new 
URI(url).toURL()).openStream());
 
         Document doc = Jsoup.parse(new String(content, 
StandardCharsets.UTF_8), url);
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/parse/filter/LinkParseFilter.java 
b/core/src/main/java/org/apache/stormcrawler/parse/filter/LinkParseFilter.java
index d379e014..eb23c795 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/parse/filter/LinkParseFilter.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/parse/filter/LinkParseFilter.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.parse.filter;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -73,11 +75,11 @@ public class LinkParseFilter extends XPathFilter {
 
         java.net.URL sourceUrl;
         try {
-            sourceUrl = new URL(url);
-        } catch (MalformedURLException e1) {
+            sourceUrl = new URI(url).toURL();
+        } catch (MalformedURLException | URISyntaxException e1) {
             // we would have known by now as previous components check whether
             // the URL is valid
-            LOG.error("MalformedURLException on {}", url);
+            LOG.error("Malformed URL on {}", url);
             return;
         }
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java 
b/core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java
index 63849eec..00198119 100644
--- a/core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java
+++ b/core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java
@@ -23,6 +23,8 @@ import crawlercommons.robots.BaseRobotRules;
 import crawlercommons.robots.SimpleRobotRules;
 import crawlercommons.robots.SimpleRobotRules.RobotRulesMode;
 import crawlercommons.robots.SimpleRobotRulesParser;
+
+import java.net.URI;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -200,7 +202,7 @@ public abstract class RobotRulesParser {
     public BaseRobotRules getRobotRulesSet(Protocol protocol, String url) {
         URL u;
         try {
-            u = new URL(url);
+            u = new URI(url).toURL();
         } catch (Exception e) {
             return EMPTY_RULES;
         }
diff --git 
a/core/src/main/java/org/apache/stormcrawler/protocol/file/FileResponse.java 
b/core/src/main/java/org/apache/stormcrawler/protocol/file/FileResponse.java
index e5858f35..88d1de16 100644
--- a/core/src/main/java/org/apache/stormcrawler/protocol/file/FileResponse.java
+++ b/core/src/main/java/org/apache/stormcrawler/protocol/file/FileResponse.java
@@ -21,6 +21,8 @@ import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.net.URLDecoder;
 import java.nio.charset.StandardCharsets;
@@ -45,13 +47,13 @@ public class FileResponse {
     private int statusCode;
     private final Metadata metadata;
 
-    public FileResponse(String u, Metadata md, FileProtocol fileProtocol) 
throws IOException {
+    public FileResponse(String u, Metadata md, FileProtocol fileProtocol) 
throws IOException, URISyntaxException {
 
         metadata = new Metadata();
         content = new byte[0];
         statusCode = HttpStatus.SC_INTERNAL_SERVER_ERROR;
 
-        URL url = new URL(u);
+        URL url = new URI(u).toURL();
 
         if (!url.getPath().equals(url.getFile())) {
             LOG.warn("url.getPath() != url.getFile(): {}.", url);
diff --git 
a/core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java 
b/core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java
index e8a14eb9..89d99934 100644
--- 
a/core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java
+++ 
b/core/src/main/java/org/apache/stormcrawler/protocol/okhttp/HttpProtocol.java
@@ -21,6 +21,8 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.MalformedURLException;
 import java.net.Proxy;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.charset.StandardCharsets;
 import java.security.cert.CertificateException;
@@ -271,11 +273,11 @@ public class HttpProtocol extends AbstractHttpProtocol {
             return;
         }
         try {
-            final List<Cookie> cookies = 
CookieConverter.getCookies(cookieStrings, new URL(url));
+            final List<Cookie> cookies = 
CookieConverter.getCookies(cookieStrings, new URI(url).toURL());
             for (Cookie c : cookies) {
                 rb.addHeader("Cookie", c.getName() + "=" + c.getValue());
             }
-        } catch (MalformedURLException e) { // Bad url , nothing to do
+        } catch (MalformedURLException | URISyntaxException e) { // Bad url , 
nothing to do
         }
     }
 
diff --git 
a/core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java 
b/core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java
index 384c3eeb..b8f11ec0 100644
--- a/core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java
+++ b/core/src/main/java/org/apache/stormcrawler/util/URLPartitioner.java
@@ -20,6 +20,8 @@ package org.apache.stormcrawler.util;
 import crawlercommons.domains.PaidLevelDomain;
 import java.net.InetAddress;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Map;
 import org.apache.commons.lang3.StringUtils;
@@ -58,9 +60,9 @@ public class URLPartitioner {
         if (partitionKey == null) {
             URL u;
             try {
-                u = new URL(url);
+                u = new URI(url).toURL();
                 host = u.getHost();
-            } catch (MalformedURLException e1) {
+            } catch (MalformedURLException | URISyntaxException e) {
                 LOG.warn("Invalid URL: {}", url);
                 return null;
             }
diff --git a/core/src/main/java/org/apache/stormcrawler/util/URLUtil.java 
b/core/src/main/java/org/apache/stormcrawler/util/URLUtil.java
index fe3c72f2..1350ece0 100644
--- a/core/src/main/java/org/apache/stormcrawler/util/URLUtil.java
+++ b/core/src/main/java/org/apache/stormcrawler/util/URLUtil.java
@@ -20,6 +20,7 @@ package org.apache.stormcrawler.util;
 import java.net.IDN;
 import java.net.MalformedURLException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Locale;
 import java.util.regex.Pattern;
@@ -137,8 +138,8 @@ public class URLUtil {
      *
      * @throws MalformedURLException
      */
-    public static String[] getHostSegments(String url) throws 
MalformedURLException {
-        return getHostSegments(new URL(url));
+    public static String[] getHostSegments(String url) throws 
MalformedURLException, URISyntaxException {
+        return getHostSegments(new URI(url).toURL());
     }
 
     /**
@@ -149,8 +150,8 @@ public class URLUtil {
      */
     public static String getHost(String url) {
         try {
-            return new URL(url).getHost().toLowerCase(Locale.ROOT);
-        } catch (MalformedURLException e) {
+            return new URI(url).toURL().getHost().toLowerCase(Locale.ROOT);
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
     }
@@ -167,16 +168,16 @@ public class URLUtil {
             // get the full url, and replace the query string with and empty
             // string
             url = url.toLowerCase(Locale.ROOT);
-            String queryStr = new URL(url).getQuery();
+            String queryStr = new URI(url).toURL().getQuery();
             return (queryStr != null) ? url.replace("?" + queryStr, "") : url;
-        } catch (MalformedURLException e) {
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
     }
 
     public static String toASCII(String url) {
         try {
-            URL u = new URL(url);
+            URL u = new URI(url).toURL();
             URI p =
                     new URI(
                             u.getProtocol(),
@@ -195,7 +196,7 @@ public class URLUtil {
 
     public static String toUNICODE(String url) {
         try {
-            URL u = new URL(url);
+            URL u = new URI(url).toURL();
             URI p =
                     new URI(
                             u.getProtocol(),
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLFilterTest.java
index 017b60b8..e59f8cc4 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -40,25 +42,25 @@ class BasicURLFilterTest {
     }
 
     @Test
-    void testRepetition() throws MalformedURLException {
+    void testRepetition() throws MalformedURLException, URISyntaxException {
         URLFilter filter = createFilter(-1, 3);
         Metadata metadata = new Metadata();
-        URL targetURL = new 
URL("http://www.sourcedomain.com/a/a/a/index.html";);
+        URL targetURL = new 
URI("http://www.sourcedomain.com/a/a/a/index.html";).toURL();
         String filterResult = filter.filter(targetURL, metadata, 
targetURL.toExternalForm());
         Assertions.assertNull(filterResult);
-        targetURL = new URL("http://www.sourcedomain.com/a/b/a/index.html";);
+        targetURL = new 
URI("http://www.sourcedomain.com/a/b/a/index.html";).toURL();
         filterResult = filter.filter(targetURL, metadata, 
targetURL.toExternalForm());
         Assertions.assertEquals(targetURL.toExternalForm(), filterResult);
     }
 
     @Test
-    void testLength() throws MalformedURLException {
+    void testLength() throws MalformedURLException, URISyntaxException {
         URLFilter filter = createFilter(32, -1);
         Metadata metadata = new Metadata();
-        URL targetURL = new 
URL("http://www.sourcedomain.com/a/a/a/index.html";);
+        URL targetURL = new 
URI("http://www.sourcedomain.com/a/a/a/index.html";).toURL();
         String filterResult = filter.filter(targetURL, metadata, 
targetURL.toExternalForm());
         Assertions.assertNull(filterResult);
-        targetURL = new URL("http://www.sourcedomain.com/";);
+        targetURL = new URI("http://www.sourcedomain.com/";).toURL();
         filterResult = filter.filter(targetURL, metadata, 
targetURL.toExternalForm());
         Assertions.assertEquals(targetURL.toExternalForm(), filterResult);
     }
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
index 65da7630..65676040 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/BasicURLNormalizerTest.java
@@ -23,6 +23,8 @@ import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -79,9 +81,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testAnchorFilter() throws MalformedURLException {
+    void testAnchorFilter() throws MalformedURLException, URISyntaxException {
         URLFilter allAllowed = createFilter(true, false);
-        URL url = new URL("http://www.sourcedomain.com/#0";);
+        URL url = new URI("http://www.sourcedomain.com/#0";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = allAllowed.filter(url, metadata, 
url.toExternalForm());
         String expected = "http://www.sourcedomain.com/";;
@@ -89,18 +91,18 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testAnchorFilterFalse() throws MalformedURLException {
+    void testAnchorFilterFalse() throws MalformedURLException, 
URISyntaxException {
         URLFilter allAllowed = createFilter(false, false);
-        URL url = new URL("http://www.sourcedomain.com/#0";);
+        URL url = new URI("http://www.sourcedomain.com/#0";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = allAllowed.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testRemoveSomeOfManyQueryParams() throws MalformedURLException {
+    void testRemoveSomeOfManyQueryParams() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com?keep1=true&a=c&foo=baz&keep2=true";;
         String expectedResult = "http://google.com?keep1=true&keep2=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -108,9 +110,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testRemoveAllQueryParams() throws MalformedURLException {
+    void testRemoveAllQueryParams() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com?a=c&foo=baz";;
         String expectedResult = "http://google.com";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -118,9 +120,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testRemoveDupeQueryParams() throws MalformedURLException {
+    void testRemoveDupeQueryParams() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com?a=c&foo=baz&foo=bar&test=true";;
         String expectedResult = "http://google.com?test=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -128,9 +130,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testPipeInUrlAndFilterStillWorks() throws MalformedURLException {
+    void testPipeInUrlAndFilterStillWorks() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com?a=c|d&foo=baz&foo=bar&test=true";
         String expectedResult = "http://google.com?test=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -138,9 +140,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testBothAnchorAndQueryFilter() throws MalformedURLException {
+    void testBothAnchorAndQueryFilter() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(true, queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = 
"http://google.com?a=c|d&foo=baz&foo=bar&test=true#fragment=ohYeah";
         String expectedResult = "http://google.com?test=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -148,9 +150,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testQuerySort() throws MalformedURLException {
+    void testQuerySort() throws MalformedURLException, URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = 
"http://google.com?a=c|d&foo=baz&foo=bar&test=true&z=2&d=4";
         String expectedResult = "http://google.com?d=4&test=true&z=2";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -158,9 +160,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testMangledQueryString() throws MalformedURLException {
+    void testMangledQueryString() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com&d=4&good=true";;
         String expectedResult = "http://google.com?d=4&good=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -168,11 +170,11 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testHashes() throws MalformedURLException {
+    void testHashes() throws MalformedURLException, URISyntaxException {
         ObjectNode filterParams = new ObjectNode(JsonNodeFactory.instance);
         filterParams.put("removeHashes", true);
         URLFilter urlFilter = createFilter(filterParams);
-        URL testSourceUrl = new URL("http://florida-chemical.com";);
+        URL testSourceUrl = new URI("http://florida-chemical.com";).toURL();
         String in =
                 
"http://www.florida-chemical.com/Diacetone-Alcohol-DAA-99.html?xid_0b629=12854b827878df26423d933a5baf86d5";;
         String out = 
"http://www.florida-chemical.com/Diacetone-Alcohol-DAA-99.html";;
@@ -186,9 +188,9 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testDontFixMangledQueryString() throws MalformedURLException {
+    void testDontFixMangledQueryString() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(true, false, queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com&d=4&good=true";;
         String expectedResult = "http://google.com&d=4&good=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -196,14 +198,14 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testFixMangledQueryString() throws MalformedURLException {
+    void testFixMangledQueryString() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(false, true, queryParamsToFilter);
-        URL testSourceUrl = new URL("http://google.com";);
+        URL testSourceUrl = new URI("http://google.com";).toURL();
         String testUrl = "http://google.com&d=4&good=true";;
         String expectedResult = "http://google.com?d=4&good=true";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
         assertEquals(expectedResult, normalizedUrl, "Failed to filter query 
string");
-        testSourceUrl = new URL("http://dev.com";);
+        testSourceUrl = new URI("http://dev.com";).toURL();
         testUrl = "http://dev.com/s&utax/NEWSRLSEfy18.pdf";;
         normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
         expectedResult = "http://dev.com/s&utax/NEWSRLSEfy18.pdf";;
@@ -211,11 +213,11 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testProperURLEncodingWithoutQueryParameter() throws 
MalformedURLException {
+    void testProperURLEncodingWithoutQueryParameter() throws 
MalformedURLException, URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
         String urlWithEscapedCharacters =
                 
"http://www.dillards.com/product/ASICS-Womens-GT2000-3-LiteShow%E2%84%A2-Running-Shoes_301_-1_301_504736989";;
-        URL testSourceUrl = new URL(urlWithEscapedCharacters);
+        URL testSourceUrl = new URI(urlWithEscapedCharacters).toURL();
         String testUrl = urlWithEscapedCharacters;
         String expectedResult = urlWithEscapedCharacters;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -223,11 +225,11 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testProperURLEncodingWithQueryParameters() throws 
MalformedURLException {
+    void testProperURLEncodingWithQueryParameters() throws 
MalformedURLException, URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
         String urlWithEscapedCharacters =
                 
"http://www.dillards.com/product/ASICS-Womens-GT2000-3-LiteShow%E2%84%A2-Running-Shoes_301_-1_301_504736989?how=are&you=doing";;
-        URL testSourceUrl = new URL(urlWithEscapedCharacters);
+        URL testSourceUrl = new URI(urlWithEscapedCharacters).toURL();
         String testUrl = urlWithEscapedCharacters;
         String expectedResult = urlWithEscapedCharacters;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
@@ -235,24 +237,24 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testProperURLEncodingWithBackSlash() throws MalformedURLException {
+    void testProperURLEncodingWithBackSlash() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(queryParamsToFilter);
         String urlWithEscapedCharacters =
                 
"http://www.voltaix.com/\\SDS\\Silicon\\Trisilane\\Trisilane_SI050_USENG.pdf";;
         String expectedResult =
                 
"http://www.voltaix.com/%5CSDS%5CSilicon%5CTrisilane%5CTrisilane_SI050_USENG.pdf";;
-        URL testSourceUrl = new URL(urlWithEscapedCharacters);
+        URL testSourceUrl = new URI(urlWithEscapedCharacters).toURL();
         String testUrl = urlWithEscapedCharacters;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
testUrl);
         assertEquals(expectedResult, normalizedUrl, "Failed to filter query 
string");
     }
 
     @Test
-    void testInvalidURI() throws MalformedURLException {
+    void testInvalidURI() throws MalformedURLException, URISyntaxException {
         URLFilter urlFilter = createFilter(true, true);
         // this one is now handled by the normaliser
         String nonURI = 
"http://www.quanjing.com/search.aspx?q=top-651451||1|60|1|2||||&Fr=4";
-        URL testSourceUrl = new URL(nonURI);
+        URL testSourceUrl = new URI(nonURI).toURL();
         String expectedResult =
                 
"http://www.quanjing.com/search.aspx?q=top-651451%7C%7C1%7C60%7C1%7C2%7C%7C%7C%7C&Fr=4";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
nonURI);
@@ -260,7 +262,7 @@ class BasicURLNormalizerTest {
         // this one is
         nonURI =
                 
"http://vins.lemonde.fr?utm_source=LeMonde_partenaire_hp&utm_medium=EMPLACEMENT 
PARTENAIRE&utm_term=&utm_content=&utm_campaign=LeMonde_partenaire_hp";
-        testSourceUrl = new URL(nonURI);
+        testSourceUrl = new URI(nonURI).toURL();
         expectedResult =
                 
"http://vins.lemonde.fr?utm_source=LeMonde_partenaire_hp&utm_medium=EMPLACEMENT%20PARTENAIRE&utm_term=&utm_content=&utm_campaign=LeMonde_partenaire_hp";;
         normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
nonURI);
@@ -269,16 +271,16 @@ class BasicURLNormalizerTest {
         // 
http://docs.oracle.com/javase/7/docs/api/java/net/URI.html#normalize()
         String nonNormURL =
                 
"http://docs.oracle.com/javase/7/docs/api/java/net/../net/./URI.html#normalize()";
-        testSourceUrl = new URL(nonNormURL);
+        testSourceUrl = new URI(nonNormURL).toURL();
         expectedResult = 
"http://docs.oracle.com/javase/7/docs/api/java/net/URI.html";;
         normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
nonNormURL);
         assertEquals(expectedResult, normalizedUrl, "Failed to filter query 
string");
     }
 
     @Test
-    void testLowerCasing() throws MalformedURLException {
+    void testLowerCasing() throws MalformedURLException, URISyntaxException {
         URLFilter urlFilter = createFilter(false, false);
-        URL testSourceUrl = new URL("http://blablabla.org/";);
+        URL testSourceUrl = new URI("http://blablabla.org/";).toURL();
         String inputURL = "HTTP://www.quanjing.com/";
         String expectedResult = inputURL.toLowerCase(Locale.ROOT);
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
inputURL);
@@ -291,9 +293,9 @@ class BasicURLNormalizerTest {
 
     // https://github.com/apache/stormcrawler/issues/401
     @Test
-    void testNonStandardPercentEncoding() throws MalformedURLException {
+    void testNonStandardPercentEncoding() throws MalformedURLException, 
URISyntaxException {
         URLFilter urlFilter = createFilter(false, false);
-        URL testSourceUrl = new 
URL("http://www.hurriyet.com.tr/index/?d=20160328&p=13";);
+        URL testSourceUrl = new 
URI("http://www.hurriyet.com.tr/index/?d=20160328&p=13";).toURL();
         String inputURL = 
"http://www.hurriyet.com.tr/index/?d=20160328&p=13&s=ni%u011fde";;
         String expectedURL = 
"http://www.hurriyet.com.tr/index/?d=20160328&p=13&s=ni%C4%9Fde";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
inputURL);
@@ -301,11 +303,11 @@ class BasicURLNormalizerTest {
     }
 
     @Test
-    void testHostIDNtoASCII() throws MalformedURLException {
+    void testHostIDNtoASCII() throws MalformedURLException, URISyntaxException 
{
         ObjectNode filterParams = new ObjectNode(JsonNodeFactory.instance);
         filterParams.put("hostIDNtoASCII", true);
         URLFilter urlFilter = createFilter(filterParams);
-        URL testSourceUrl = new URL("http://www.example.com/";);
+        URL testSourceUrl = new URI("http://www.example.com/";).toURL();
         String inputURL = "http://señal6.com.ar/";;
         String expectedURL = "http://xn--seal6-pta.com.ar/";;
         String normalizedUrl = urlFilter.filter(testSourceUrl, new Metadata(), 
inputURL);
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
index 6ae14691..09bd8c31 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/FastURLFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -39,28 +41,28 @@ class FastURLFilterTest {
     }
 
     @Test
-    void testImagesFilter() throws MalformedURLException {
-        URL url = new URL("http://www.somedomain.com/image.jpg";);
+    void testImagesFilter() throws MalformedURLException, URISyntaxException {
+        URL url = new URI("http://www.somedomain.com/image.jpg";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
         Assertions.assertNull(filterResult);
     }
 
     @Test
-    void testDomainNotAllowed() throws MalformedURLException {
-        URL url = new URL("http://stormcrawler.net/";);
+    void testDomainNotAllowed() throws MalformedURLException, 
URISyntaxException {
+        URL url = new URI("http://stormcrawler.net/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
         Assertions.assertNull(filterResult);
         // allowed
-        url = new URL("http://stormcrawler.net/bla/";);
+        url = new URI("http://stormcrawler.net/bla/";).toURL();
         filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toString(), filterResult);
     }
 
     @Test
-    void testMD() throws MalformedURLException {
-        URL url = new URL("http://somedomain.net/";);
+    void testMD() throws MalformedURLException, URISyntaxException {
+        URL url = new URI("http://somedomain.net/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "value");
         String filterResult = createFilter().filter(url, metadata, 
url.toExternalForm());
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/HostURLFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/HostURLFilterTest.java
index 5b183919..59b81c64 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/HostURLFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/HostURLFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -44,9 +46,9 @@ class HostURLFilterTest {
     }
 
     @Test
-    void testAllAllowed() throws MalformedURLException {
+    void testAllAllowed() throws MalformedURLException, URISyntaxException {
         HostURLFilter allAllowed = createFilter(false, false);
-        URL sourceURL = new URL("http://www.sourcedomain.com/index.html";);
+        URL sourceURL = new 
URI("http://www.sourcedomain.com/index.html";).toURL();
         Metadata metadata = new Metadata();
         String filterResult =
                 allAllowed.filter(sourceURL, metadata, 
"http://www.sourcedomain.com/index.html";);
@@ -60,9 +62,9 @@ class HostURLFilterTest {
     }
 
     @Test
-    void testAllForbidden() throws MalformedURLException {
+    void testAllForbidden() throws MalformedURLException, URISyntaxException {
         HostURLFilter allAllowed = createFilter(true, true);
-        URL sourceURL = new URL("http://www.sourcedomain.com/index.html";);
+        URL sourceURL = new 
URI("http://www.sourcedomain.com/index.html";).toURL();
         Metadata metadata = new Metadata();
         String filterResult =
                 allAllowed.filter(sourceURL, metadata, 
"http://www.sourcedomain.com/index.html";);
@@ -76,9 +78,9 @@ class HostURLFilterTest {
     }
 
     @Test
-    void testWithinHostOnly() throws MalformedURLException {
+    void testWithinHostOnly() throws MalformedURLException, URISyntaxException 
{
         HostURLFilter allAllowed = createFilter(true, false);
-        URL sourceURL = new URL("http://www.sourcedomain.com/index.html";);
+        URL sourceURL = new 
URI("http://www.sourcedomain.com/index.html";).toURL();
         Metadata metadata = new Metadata();
         String filterResult =
                 allAllowed.filter(sourceURL, metadata, 
"http://www.sourcedomain.com/index.html";);
@@ -92,9 +94,9 @@ class HostURLFilterTest {
     }
 
     @Test
-    void testWithinDomain() throws MalformedURLException {
+    void testWithinDomain() throws MalformedURLException, URISyntaxException {
         HostURLFilter allAllowed = createFilter(false, true);
-        URL sourceURL = new URL("http://www.sourcedomain.com/index.html";);
+        URL sourceURL = new 
URI("http://www.sourcedomain.com/index.html";).toURL();
         Metadata metadata = new Metadata();
         String filterResult =
                 allAllowed.filter(sourceURL, metadata, 
"http://www.sourcedomain.com/index.html";);
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/MaxDepthFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/MaxDepthFilterTest.java
index a76a5c74..24fa6563 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/MaxDepthFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/MaxDepthFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -40,18 +42,18 @@ class MaxDepthFilterTest {
     }
 
     @Test
-    void testDepthZero() throws MalformedURLException {
+    void testDepthZero() throws MalformedURLException, URISyntaxException {
         URLFilter filter = createFilter("maxDepth", 0);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertNull(filterResult);
     }
 
     @Test
-    void testDepth() throws MalformedURLException {
+    void testDepth() throws MalformedURLException, URISyntaxException {
         URLFilter filter = createFilter("maxDepth", 2);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.setValue(MetadataTransfer.depthKeyName, "2");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -59,9 +61,9 @@ class MaxDepthFilterTest {
     }
 
     @Test
-    void testCustomDepthZero() throws MalformedURLException {
+    void testCustomDepthZero() throws MalformedURLException, 
URISyntaxException {
         URLFilter filter = createFilter("maxDepth", 3);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.setValue(MetadataTransfer.maxDepthKeyName, "0");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -69,9 +71,9 @@ class MaxDepthFilterTest {
     }
 
     @Test
-    void testCustomDepth() throws MalformedURLException {
+    void testCustomDepth() throws MalformedURLException, URISyntaxException {
         URLFilter filter = createFilter("maxDepth", 1);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.setValue(MetadataTransfer.maxDepthKeyName, "2");
         metadata.setValue(MetadataTransfer.depthKeyName, "1");
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterFromJsonTest.java
 
b/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterFromJsonTest.java
index 85a624b9..14ae326b 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterFromJsonTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterFromJsonTest.java
@@ -17,6 +17,8 @@
 package org.apache.stormcrawler.filtering;
 
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.Map;
 import org.apache.stormcrawler.Metadata;
@@ -31,18 +33,18 @@ class MetadataFilterFromJsonTest {
 
     // old filter mechanism (backward compatible)
     @Test
-    void testFilterNoMD() throws MalformedURLException {
+    void testFilterNoMD() throws MalformedURLException, URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.1.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testFilterHit() throws MalformedURLException {
+    void testFilterHit() throws MalformedURLException, URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.1.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -50,9 +52,9 @@ class MetadataFilterFromJsonTest {
     }
 
     @Test
-    void testFilterNoHit() throws MalformedURLException {
+    void testFilterNoHit() throws MalformedURLException, URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.1.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val2");
         metadata.addValue("key", "val3");
@@ -62,36 +64,36 @@ class MetadataFilterFromJsonTest {
 
     // new filter mechanism
     @Test
-    void testNewFilterWithEmptyFilterAndNullMetadata() throws 
MalformedURLException {
+    void testNewFilterWithEmptyFilterAndNullMetadata() throws 
MalformedURLException, URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.2.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         String filterResult = filter.filter(url, null, url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithEmptyFilterAndEmptyMetadata() throws 
MalformedURLException {
+    void testNewFilterWithEmptyFilterAndEmptyMetadata() throws 
MalformedURLException, URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.2.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithEmptyMetadata() throws MalformedURLException {
+    void testNewFilterWithEmptyMetadata() throws MalformedURLException, 
URISyntaxException  {
         URLFilters filter = 
createURLFilters("test.metadata.2.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithOnlyOneMatchingANDFilter() throws 
MalformedURLException {
+    void testNewFilterWithOnlyOneMatchingANDFilter() throws 
MalformedURLException, URISyntaxException  {
         // Filter if key=>val AND key2=>val2 match
         URLFilters filter = 
createURLFilters("test.metadata.2.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -99,10 +101,10 @@ class MetadataFilterFromJsonTest {
     }
 
     @Test
-    void testNewFilterWithAllMatchingANDFilter() throws MalformedURLException {
+    void testNewFilterWithAllMatchingANDFilter() throws MalformedURLException, 
URISyntaxException {
         // Filter if key=>val AND key2=>val2 match
         URLFilters filter = 
createURLFilters("test.metadata.2.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         metadata.addValue("key2", "val2");
@@ -111,10 +113,10 @@ class MetadataFilterFromJsonTest {
     }
 
     @Test
-    void testNewFilterWithComplexFilter() throws MalformedURLException {
+    void testNewFilterWithComplexFilter() throws MalformedURLException, 
URISyntaxException  {
         // Filter if key=>val AND (key2=>val2 OR key3=>val3) match
         URLFilters filter = 
createURLFilters("test.metadata.3.urlfilters.json");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
 
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterTest.java
index 6a39568b..1a84e9ef 100644
--- 
a/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterTest.java
+++ 
b/core/src/test/java/org/apache/stormcrawler/filtering/MetadataFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -40,18 +42,18 @@ class MetadataFilterTest {
 
     // old filter mechanism (backward compatible)
     @Test
-    void testFilterNoMD() throws MalformedURLException {
+    void testFilterNoMD() throws MalformedURLException, URISyntaxException  {
         URLFilter filter = createFilter("key", "val");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testFilterHit() throws MalformedURLException {
+    void testFilterHit() throws MalformedURLException, URISyntaxException  {
         URLFilter filter = createFilter("key", "val");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -59,9 +61,9 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testFilterNoHit() throws MalformedURLException {
+    void testFilterNoHit() throws MalformedURLException, URISyntaxException  {
         URLFilter filter = createFilter("key", "val");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val2");
         metadata.addValue("key", "val3");
@@ -71,26 +73,26 @@ class MetadataFilterTest {
 
     // new filter mechanism
     @Test
-    void testNewFilterWithEmptyFilterAndNullMetadata() throws 
MalformedURLException {
+    void testNewFilterWithEmptyFilterAndNullMetadata() throws 
MalformedURLException, URISyntaxException  {
         MetadataFilter filter = new MetadataFilter();
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         String filterResult = filter.filter(url, null, url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithEmptyFilterAndEmptyMetadata() throws 
MalformedURLException {
+    void testNewFilterWithEmptyFilterAndEmptyMetadata() throws 
MalformedURLException, URISyntaxException  {
         MetadataFilter filter = new MetadataFilter();
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithEmptyFilter() throws MalformedURLException {
+    void testNewFilterWithEmptyFilter() throws MalformedURLException, 
URISyntaxException  {
         MetadataFilter filter = new MetadataFilter();
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -98,21 +100,21 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithEmptyMetadata() throws MalformedURLException {
+    void testNewFilterWithEmptyMetadata() throws MalformedURLException, 
URISyntaxException  {
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
     }
 
     @Test
-    void testNewFilterWithSingleMatchingORFilter() throws 
MalformedURLException {
+    void testNewFilterWithSingleMatchingORFilter() throws 
MalformedURLException, URISyntaxException  {
         // Filter if key=>val match (OR operation)
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -120,12 +122,12 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithSingleMatchingANDFilter() throws 
MalformedURLException {
+    void testNewFilterWithSingleMatchingANDFilter() throws 
MalformedURLException, URISyntaxException  {
         // Filter if key=>val match (AND operation)
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
         filter.setOperation(MetadataFilter.FilterOperation.AND);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -133,12 +135,12 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithOnlyOneMatchingORFilter() throws 
MalformedURLException {
+    void testNewFilterWithOnlyOneMatchingORFilter() throws 
MalformedURLException, URISyntaxException  {
         // Filter if key=>val OR key2=>val2 match
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
         filter.addFilter("key2", "val2");
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -146,13 +148,13 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithOnlyOneMatchingANDFilter() throws 
MalformedURLException {
+    void testNewFilterWithOnlyOneMatchingANDFilter() throws 
MalformedURLException, URISyntaxException  {
         // Filter if key=>val AND key2=>val2 match
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
         filter.addFilter("key2", "val2");
         filter.setOperation(MetadataFilter.FilterOperation.AND);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         String filterResult = filter.filter(url, metadata, 
url.toExternalForm());
@@ -160,13 +162,13 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithAllMatchingANDFilter() throws MalformedURLException {
+    void testNewFilterWithAllMatchingANDFilter() throws MalformedURLException, 
URISyntaxException  {
         // Filter if key=>val AND key2=>val2 match
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
         filter.addFilter("key2", "val2");
         filter.setOperation(MetadataFilter.FilterOperation.AND);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
         metadata.addValue("key2", "val2");
@@ -175,7 +177,7 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithComplexFilter() throws MalformedURLException {
+    void testNewFilterWithComplexFilter() throws MalformedURLException, 
URISyntaxException  {
         // Filter if key=>val AND (key2=>val2 OR key3=>val3) match
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
@@ -184,7 +186,7 @@ class MetadataFilterTest {
         filter2.addFilter("key2", "val2");
         filter2.addFilter("key3", "val3");
         filter.addFilter(filter2);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
 
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
@@ -221,7 +223,7 @@ class MetadataFilterTest {
     }
 
     @Test
-    void testNewFilterWithOtherComplexFilter() throws MalformedURLException {
+    void testNewFilterWithOtherComplexFilter() throws MalformedURLException, 
URISyntaxException {
         // Filter if key=>val OR (key2=>val2 AND key3=>val3) match
         MetadataFilter filter = new MetadataFilter();
         filter.addFilter("key", "val");
@@ -230,7 +232,7 @@ class MetadataFilterTest {
         filter2.addFilter("key3", "val3");
         filter2.setOperation(MetadataFilter.FilterOperation.AND);
         filter.addFilter(filter2);
-        URL url = new URL("http://www.sourcedomain.com/";);
+        URL url = new URI("http://www.sourcedomain.com/";).toURL();
 
         Metadata metadata = new Metadata();
         metadata.addValue("key", "val");
diff --git 
a/core/src/test/java/org/apache/stormcrawler/filtering/RegexFilterTest.java 
b/core/src/test/java/org/apache/stormcrawler/filtering/RegexFilterTest.java
index 60482433..183495ae 100644
--- a/core/src/test/java/org/apache/stormcrawler/filtering/RegexFilterTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/filtering/RegexFilterTest.java
@@ -19,6 +19,8 @@ package org.apache.stormcrawler.filtering;
 import com.fasterxml.jackson.databind.node.JsonNodeFactory;
 import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
@@ -43,35 +45,35 @@ class RegexFilterTest {
     }
 
     @Test
-    void testProtocolFilter() throws MalformedURLException {
+    void testProtocolFilter() throws MalformedURLException, URISyntaxException 
{
         URLFilter allAllowed = createFilter();
-        URL url = new URL("ftp://www.someFTP.com/#0";);
+        URL url = new URI("ftp://www.someFTP.com/#0";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = allAllowed.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertNull(filterResult);
     }
 
     @Test
-    void testImagesFilter() throws MalformedURLException {
+    void testImagesFilter() throws MalformedURLException, URISyntaxException {
         URLFilter allAllowed = createFilter();
-        URL url = new URL("http://www.someFTP.com/bla.gif";);
+        URL url = new URI("http://www.someFTP.com/bla.gif";).toURL();
         Metadata metadata = new Metadata();
         String filterResult = allAllowed.filter(url, metadata, 
url.toExternalForm());
         Assertions.assertNull(filterResult);
-        url = new URL("http://www.someFTP.com/bla.GIF";);
+        url = new URI("http://www.someFTP.com/bla.GIF";).toURL();
         filterResult = allAllowed.filter(url, metadata, url.toExternalForm());
         Assertions.assertNull(filterResult);
-        url = new URL("http://www.someFTP.com/bla.GIF&somearg=0";);
+        url = new URI("http://www.someFTP.com/bla.GIF&somearg=0";).toURL();
         filterResult = allAllowed.filter(url, metadata, url.toExternalForm());
         Assertions.assertNull(filterResult);
-        url = new URL("http://www.someFTP.com/bla.GIF?somearg=0";);
+        url = new URI("http://www.someFTP.com/bla.GIF?somearg=0";).toURL();
         filterResult = allAllowed.filter(url, metadata, url.toExternalForm());
         Assertions.assertNull(filterResult);
         // not this one : the gif is within the path
-        url = new URL("http://www.someFTP.com/bla.GIF.orNot";);
+        url = new URI("http://www.someFTP.com/bla.GIF.orNot";).toURL();
         filterResult = allAllowed.filter(url, metadata, url.toExternalForm());
         Assertions.assertEquals(url.toExternalForm(), filterResult);
-        url = new URL("http://www.someFTP.com/bla.mp4";);
+        url = new URI("http://www.someFTP.com/bla.mp4";).toURL();
         filterResult = allAllowed.filter(url, metadata, url.toExternalForm());
         Assertions.assertNull(filterResult);
     }
diff --git 
a/core/src/test/java/org/apache/stormcrawler/util/CookieConverterTest.java 
b/core/src/test/java/org/apache/stormcrawler/util/CookieConverterTest.java
index 1b9cca8d..a434a825 100644
--- a/core/src/test/java/org/apache/stormcrawler/util/CookieConverterTest.java
+++ b/core/src/test/java/org/apache/stormcrawler/util/CookieConverterTest.java
@@ -17,6 +17,8 @@
 package org.apache.stormcrawler.util;
 
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.List;
 import org.apache.http.cookie.Cookie;
@@ -307,8 +309,8 @@ class CookieConverterTest {
 
     private URL getUrl(String urlString) {
         try {
-            return new URL(urlString);
-        } catch (MalformedURLException e) {
+            return new URI(urlString).toURL();
+        } catch (MalformedURLException | URISyntaxException e) {
             return null;
         }
     }
diff --git 
a/external/selenium/src/main/java/org/apache/stormcrawler/protocol/selenium/RemoteDriverProtocol.java
 
b/external/selenium/src/main/java/org/apache/stormcrawler/protocol/selenium/RemoteDriverProtocol.java
index 219f4b0a..35ef66b1 100644
--- 
a/external/selenium/src/main/java/org/apache/stormcrawler/protocol/selenium/RemoteDriverProtocol.java
+++ 
b/external/selenium/src/main/java/org/apache/stormcrawler/protocol/selenium/RemoteDriverProtocol.java
@@ -16,6 +16,7 @@
  */
 package org.apache.stormcrawler.protocol.selenium;
 
+import java.net.URI;
 import java.net.URL;
 import java.time.Duration;
 import java.util.ArrayList;
@@ -98,7 +99,7 @@ public class RemoteDriverProtocol extends SeleniumProtocol {
         for (String cdaddress : addresses) {
             try {
                 RemoteWebDriver driver =
-                        new RemoteWebDriver(new URL(cdaddress), capabilities, 
tracing);
+                        new RemoteWebDriver(new URI(cdaddress).toURL(), 
capabilities, tracing);
                 // setting timouts
                 // see 
https://www.browserstack.com/guide/understanding-selenium-timeouts
                 Timeouts touts = driver.manage().timeouts();
diff --git 
a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java 
b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
index 09dea259..7e229848 100644
--- a/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
+++ b/external/tika/src/main/java/org/apache/stormcrawler/tika/ParserBolt.java
@@ -21,6 +21,8 @@ import static 
org.apache.stormcrawler.Constants.StatusStreamName;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -204,9 +206,9 @@ public class ParserBolt extends BaseRichBolt {
 
         // as well as the filename
         try {
-            URL _url = new URL(url);
+            URL _url = new URI(url).toURL();
             md.set(TikaCoreProperties.RESOURCE_NAME_KEY, _url.getFile());
-        } catch (MalformedURLException e1) {
+        } catch (MalformedURLException | URISyntaxException e1) {
             throw new IllegalStateException("Malformed URL", e1);
         }
 
diff --git 
a/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCSpout.java 
b/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCSpout.java
index fb2e5a1c..df127f58 100644
--- a/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCSpout.java
+++ b/external/warc/src/main/java/org/apache/stormcrawler/warc/WARCSpout.java
@@ -17,6 +17,8 @@
 package org.apache.stormcrawler.warc;
 
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URL;
 import java.nio.ByteBuffer;
 import java.nio.channels.Channels;
@@ -145,9 +147,9 @@ public class WARCSpout extends FileSpout {
         }
     }
 
-    private ReadableByteChannel openChannel(String path) throws IOException {
+    private ReadableByteChannel openChannel(String path) throws IOException, 
URISyntaxException {
         if (path.matches("^https?://.*")) {
-            URL warcUrl = new URL(path);
+            URL warcUrl = new URI(path).toURL();
             return Channels.newChannel(warcUrl.openStream());
         }
         org.apache.hadoop.fs.Path hdfsPath = new 
org.apache.hadoop.fs.Path(path);

Reply via email to