Hello,
fine by me, since it can be worked around by using the square-bracket-delimited
syntax.
As you already did the "heavy lifting" I amended the test suite extensively (some may
call it excessively), and realized that I could squeeze in a small bugfix for the "undelimited
IPv6 address" case.
At that point in the parsing process, the scheme, credentials and path have
already been split off, which leaves only the `host[:port]` bit. Since
hostnames and IPv4 addresses can not contain colons, these cases can only have
zero or one colon (without and with port respectively). If it starts with a
square bracket, it is a delimited IPv6 address. The least number of colons a
IPv6 address can have is two (e.g. ::). So if the host-port bit does not start
with a square bracket but contains more than one colon, it has to be a
undelimited IPv6 address and is used as-is without splitting off the last bit
as port. Admittedly this is just a heuristic, but it seems to works. There is a
reason why RFC3986 does not allow undelimited IPv6 addresses: In combination
with a port they might be ambiguous.
A diff is attached (apply it with `git apply $FILE`), I will leave it to your
decision if and in what form you include it.
And if I may be so bold: I support using a specialized library for parsing
URIs/URLs/addresses. As we have seen, that is a non-trivial problem to solve.
diff --git a/src/ahttpurl.cc b/src/ahttpurl.cc
index b8ec14a..50f46a8 100644
--- a/src/ahttpurl.cc
+++ b/src/ahttpurl.cc
@@ -6,6 +6,7 @@ namespace acng {
using namespace std;
+// See RFC3986
bool tHttpUrl::SetHttpUrl(cmstring , bool unescape)
{
clear();
@@ -92,6 +93,11 @@ extract_host_check_port:
sHost.erase(0, l+1);
}
+ if (!bCheckBrac && 2 <= count(sHost.begin(), sHost.end(), ':'))
+ {
+ noPort=true;
+ }
+
l=sHost.size();
if (!noPort)
{
diff --git a/tests/src/ut_http.cc b/tests/src/ut_http.cc
index 4569c3e..55c69b8 100644
--- a/tests/src/ut_http.cc
+++ b/tests/src/ut_http.cc
@@ -212,30 +212,324 @@ TEST(http, misc)
ASSERT_GE(n, sizeof(::sockaddr_in6) + 3*sizeof(int));
}
-TEST(http, url_host_port)
+TEST(http, url_url)
{
-tHttpUrl url;
- std::string ti="127.0.0.1";
-ASSERT_TRUE(url.SetHttpUrl(ti));
- ASSERT_EQ(url.sHost, ti);
- ASSERT_EQ(url.GetPort(), 80);
- ti="127.0.0.2:8080";
- ASSERT_TRUE(url.SetHttpUrl(ti));
- ASSERT_EQ(url.sHost, "127.0.0.2");
- ASSERT_EQ(url.GetPort(), 8080);
- ti="::1999";
- ASSERT_TRUE(url.SetHttpUrl(ti));
- ASSERT_EQ(url.sHost, "::1999");
+ tHttpUrl url;
+
+ ASSERT_TRUE(url.SetHttpUrl("example.org"));
+ ASSERT_EQ(url.sUserPass, "");
+ ASSERT_EQ(url.sHost, "example.org");
ASSERT_EQ(url.GetPort(), 80);
+ ASSERT_EQ(url.sPath, "/");
- ASSERT_TRUE(url.SetHttpUrl("[::fefe]"));
- ASSERT_EQ(url.sHost, "::fefe");
+ ASSERT_TRUE(url.SetHttpUrl("https://example.net;));
+ ASSERT_EQ(url.sUserPass, "");
+ ASSERT_EQ(url.sHost, "example.net");
+ ASSERT_EQ(url.GetPort(), 443);
+ ASSERT_EQ(url.sPath, "/");
+
+ ASSERT_TRUE(url.SetHttpUrl("http://example.org;));
+ ASSERT_EQ(url.sUserPass, "");
+ ASSERT_EQ(url.sHost, "example.org");
ASSERT_EQ(url.GetPort(), 80);
+ ASSERT_EQ(url.sPath, "/");
- ASSERT_TRUE(url.SetHttpUrl("[::abcd]:8080"));
- ASSERT_EQ(url.sHost, "::abcd");
+ ASSERT_TRUE(url.SetHttpUrl("http://alice:secr...@example.net:8080/foo/bar.html?abc=123=456;));
+ ASSERT_EQ(url.sUserPass, "alice:secret");
+ ASSERT_EQ(url.sHost, "example.net");
ASSERT_EQ(url.GetPort(), 8080);
+ ASSERT_EQ(url.sPath, "/foo/bar.html?abc=123=456");
+
+ // Non-RFC3986-compliant case: undelimited IPv6 literal
+ ASSERT_TRUE(url.SetHttpUrl("https://alice:se::cret@2001:db8::/bar;));
+ ASSERT_EQ(url.sUserPass, "alice:se::cret");
+ ASSERT_EQ(url.sHost, "2001:db8::");
+ ASSERT_EQ(url.GetPort(), 443);
+ ASSERT_EQ(url.sPath, "/bar");
+}
+
+TEST(http, url_host_port_ipv4)
+{
+ tHttpUrl url;
+ uint16_t defaultPort = url.GetPort();
+
+ ASSERT_TRUE(url.SetHttpUrl("0.0.0.0"));
+ ASSERT_EQ(url.sHost, "0.0.0.0");
+ ASSERT_EQ(url.GetPort(), defaultPort);
+
+ ASSERT_TRUE(url.SetHttpUrl("127.0.0.1"));
+ ASSERT_EQ(url.sHost, "127.0.0.1");
+ ASSERT_EQ(url.GetPort(), defaultPort);
+
+ ASSERT_TRUE(url.SetHttpUrl("10.20.30.40"));
+ ASSERT_EQ(url.sHost, "10.20.30.40");
+ ASSERT_EQ(url.GetPort(), defaultPort);
+
+ ASSERT_TRUE(url.SetHttpUrl("255.255.255.255"));
+ ASSERT_EQ(url.sHost, "255.255.255.255");
+ ASSERT_EQ(url.GetPort(), defaultPort);
+}
+
+TEST(http, url_host_port_ipv4_port)
+{
+ tHttpUrl url;
+
+ ASSERT_TRUE(url.SetHttpUrl("0.0.0.0:8000"));
+ ASSERT_EQ(url.sHost, "0.0.0.0");
+ ASSERT_EQ(url.GetPort(), 8000);
+
+ ASSERT_TRUE(url.SetHttpUrl("127.0.0.1:8001"));
+ ASSERT_EQ(url.sHost, "127.0.0.1");
+ ASSERT_EQ(url.GetPort(), 8001);
+
+ ASSERT_TRUE(url.SetHttpUrl("10.20.30.40:8002"));
+ ASSERT_EQ(url.sHost, "10.20.30.40");
+ ASSERT_EQ(url.GetPort(), 8002);
+
+ ASSERT_TRUE(url.SetHttpUrl("255.255.255.255:8003"));
+ ASSERT_EQ(url.sHost,