Changeset: 4533b80259fc for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/4533b80259fc
Modified Files:
monetdb5/modules/atoms/url.c
Branch: urlfuncs
Log Message:
fighting with junk urls
diffs (35 lines):
diff --git a/monetdb5/modules/atoms/url.c b/monetdb5/modules/atoms/url.c
--- a/monetdb5/modules/atoms/url.c
+++ b/monetdb5/modules/atoms/url.c
@@ -841,15 +841,26 @@ extractURLHost(str *retval, str *url, bo
} else {
l = s - h;
}
- if ((*retval = GDKmalloc(l + 1)) != NULL) {
- if (no_www && !strncmp(h, "wwww.", 4)) {
- strcpy_len(*retval, (h + 4), l + 1);
+ if (l > 4) {
+ if ((*retval = GDKmalloc(l + 1)) != NULL) {
+ if (no_www && strlen(h) > 4 &&
!strncmp(h, "www.", 4)) {
+ strcpy_len(*retval, (h + 4), l
+ 1);
+ } else {
+ strcpy_len(*retval, h, l + 1);
+ }
+ // clean up if not valid UTF-8
+ if (!checkUTF8(*retval)) {
+ printf("%s\n", h);
+ GDKfree(*retval);
+ *retval = GDKstrdup(str_nil);
+ }
} else {
- strcpy_len(*retval, h, l + 1);
+ throw(MAL, "url.getURLHost",
SQLSTATE(HY013) MAL_MALLOC_FAIL);
}
} else {
- throw(MAL, "url.getURLHost", SQLSTATE(HY013)
MAL_MALLOC_FAIL);
+ *retval = GDKstrdup(str_nil);
}
+
} else {
*retval = GDKstrdup(str_nil);
}
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]