Changeset: 1494fe388891 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/1494fe388891
Modified Files:
monetdb5/modules/atoms/url.c
sql/scripts/12_url.sql
Branch: urlfuncs
Log Message:
relaxed version for extracting host from url
diffs (70 lines):
diff --git a/monetdb5/modules/atoms/url.c b/monetdb5/modules/atoms/url.c
--- a/monetdb5/modules/atoms/url.c
+++ b/monetdb5/modules/atoms/url.c
@@ -818,6 +818,46 @@ static str URLnoop(url *u, url *val)
return MAL_SUCCEED;
}
+/* Extract host identity from URL. This is a relaxed version,
+ * where no exceptions is thrown when the input URL is not valid,
+ * and empty string is returned instead.
+ * */
+static str
+extractURLHost(str *retval, str *url, bool no_www)
+{
+ const char *s;
+ const char *h = NULL;
+ const char *p = NULL;
+ *retval = GDKstrdup(str_nil);
+
+ if ((url != NULL || *url != NULL) && !strNil(*url)) {
+ if ((s = skip_scheme(*url)) != NULL &&
+ (s = skip_authority(s, NULL, NULL, &h, &p)) != NULL &&
+ h != NULL)
+ {
+ size_t l;
+
+ if (p != NULL) {
+ l = p - h - 1;
+ } else {
+ l = s - h;
+ }
+ if ((*retval = GDKmalloc(l + 1)) != NULL) {
+ if (no_www && !strncmp(h, "wwww.", 4)) {
+ strcpy_len(*retval, (h + 4), l + 1);
+ } else {
+ strcpy_len(*retval, h, l + 1);
+ }
+ } else {
+ throw(MAL, "url.getURLHost", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ }
+ }
+ }
+
+ return MAL_SUCCEED;
+}
+
+
#include "mel.h"
mel_atom url_init_atoms[] = {
{ .name="url", .basetype="str", .fromstr=URLfromString, .tostr=URLtoString,
}, { .cmp=NULL }
@@ -833,7 +873,8 @@ mel_func url_init_funcs[] = {
command("url", "getDomain", URLgetDomain, false, "Extract Internet domain
from the URL", args(1,2, arg("",str),arg("u",url))),
command("url", "getExtension", URLgetExtension, false, "Extract the file
extension of the URL", args(1,2, arg("",str),arg("u",url))),
command("url", "getFile", URLgetFile, false, "Extract the last file name of
the URL", args(1,2, arg("",str),arg("u",url))),
- command("url", "getHost", URLgetHost, false, "Extract the server name from
the URL", args(1,2, arg("",str),arg("u",url))),
+ command("url", "getHost", URLgetHost, false, "Extract the server name from
the URL strict version", args(1,2, arg("",str),arg("u",url))),
+ command("url", "extractURLHost", extractURLHost, false, "Extract server name
from a URL relaxed version", args(1,3, arg("",str),arg("u",str), arg("no_www",
bit))),
command("url", "getPort", URLgetPort, false, "Extract the port id from the
URL", args(1,2, arg("",str),arg("u",url))),
command("url", "getProtocol", URLgetProtocol, false, "Extract the protocol
from the URL", args(1,2, arg("",str),arg("u",url))),
command("url", "getQuery", URLgetQuery, false, "Extract the query string from
the URL", args(1,2, arg("",str),arg("u",url))),
diff --git a/sql/scripts/12_url.sql b/sql/scripts/12_url.sql
--- a/sql/scripts/12_url.sql
+++ b/sql/scripts/12_url.sql
@@ -53,3 +53,6 @@ CREATE function newurl(protocol STRING,
RETURNS url
EXTERNAL NAME url."new";
GRANT EXECUTE ON FUNCTION newurl(STRING, STRING, STRING) TO PUBLIC;
+CREATE function sys.url_extract_host(url string, no_www bool) RETURNS STRING
+ EXTERNAL NAME url."extractURLHost";
+GRANT EXECUTE ON FUNCTION url_extract_host(string, bool) TO PUBLIC;
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]