Author: sebb
Date: Tue Jan 6 20:19:21 2015
New Revision: 1649932
URL: http://svn.apache.org/r1649932
Log:
VALIDATOR-235 UrlValidator rejects url with Unicode characters in domain label
or TLD
Allow IDN domains for all schemes
Modified:
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
Modified:
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
URL:
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java?rev=1649932&r1=1649931&r2=1649932&view=diff
==============================================================================
---
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
(original)
+++
commons/proper/validator/trunk/src/main/java/org/apache/commons/validator/routines/UrlValidator.java
Tue Jan 6 20:19:21 2015
@@ -100,10 +100,6 @@ public class UrlValidator implements Ser
*/
public static final long ALLOW_LOCAL_URLS = 1 << 3;
- // Drop numeric, and "+-." for now
- // TODO does not allow for optional userinfo. Does not enforce initial
alphanumeric.
- private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
-
/**
* This expression derived/taken from the BNF for URI (RFC2396).
*/
@@ -134,6 +130,11 @@ public class UrlValidator implements Ser
private static final String SCHEME_REGEX =
"^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*";
private static final Pattern SCHEME_PATTERN =
Pattern.compile(SCHEME_REGEX);
+ // Drop numeric, and "+-." for now
+ // TODO does not allow for optional userinfo.
+ // Validation of character set is done by isValidAuthority
+ private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
+
private static final String AUTHORITY_REGEX =
"^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
// 1 2 3
@@ -144,7 +145,7 @@ public class UrlValidator implements Ser
private static final int PARSE_AUTHORITY_PORT = 2;
/**
- * Should always be empty.
+ * Should always be empty. The code currently allows spaces.
*/
private static final int PARSE_AUTHORITY_EXTRA = 3;
@@ -154,18 +155,9 @@ public class UrlValidator implements Ser
private static final String QUERY_REGEX = "^(.*)$";
private static final Pattern QUERY_PATTERN = Pattern.compile(QUERY_REGEX);
- private static final String LEGAL_ASCII_REGEX = "^\\p{ASCII}+$";
- private static final Pattern ASCII_PATTERN =
Pattern.compile(LEGAL_ASCII_REGEX);
-
private static final String PORT_REGEX = "^:(\\d{1,5})$";
private static final Pattern PORT_PATTERN = Pattern.compile(PORT_REGEX);
- // Pattern to extract domain for IDN conversion
- private static final Pattern HTTP_IDN_PATTERN =
Pattern.compile("(https?://)([^/]+)(.*)", Pattern.CASE_INSENSITIVE);
- private static final int PARSE_HTTP_IDN_SCHEME = 1;
- private static final int PARSE_HTTP_IDN_AUTH = 2;
- private static final int PARSE_HTTP_IDN_REST = 3;
-
/**
* Holds the set of current validation options.
*/
@@ -295,22 +287,6 @@ public class UrlValidator implements Ser
return false;
}
- if (!ASCII_PATTERN.matcher(value).matches()) {
- // Non-ASCII input, try and convert HTTP domain
- Matcher httpMatcher = HTTP_IDN_PATTERN.matcher(value);
- if (httpMatcher.lookingAt()) { // We have an http(s) URL
- value = httpMatcher.group(PARSE_HTTP_IDN_SCHEME)
- +
DomainValidator.unicodeToASCII(httpMatcher.group(PARSE_HTTP_IDN_AUTH))
- + httpMatcher.group(PARSE_HTTP_IDN_REST);
- if (!ASCII_PATTERN.matcher(value).matches()) {
- return false;
- }
- // Drop thru, we were able to convert the pattern
- } else {
- return false;
- }
- }
-
// Check the whole url address structure
Matcher urlMatcher = URL_PATTERN.matcher(value);
if (!urlMatcher.matches()) {
@@ -324,11 +300,11 @@ public class UrlValidator implements Ser
String authority = urlMatcher.group(PARSE_URL_AUTHORITY);
if ("file".equals(scheme) && "".equals(authority)) {
- // Special case - file: allows an empty authority
+ // Special case - file: allows an empty authority
} else {
- // Validate the authority
- if (!isValidAuthority(authority)) {
- return false;
+ // Validate the authority
+ if (!isValidAuthority(authority)) {
+ return false;
}
}
@@ -380,7 +356,7 @@ public class UrlValidator implements Ser
* If a RegexValidator was supplied and it matches, then the authority is
regarded
* as valid with no further checks, otherwise the method checks against the
* AUTHORITY_PATTERN and the DomainValidator (ALLOW_LOCAL_URLS)
- * @param authority Authority value to validate.
+ * @param authority Authority value to validate, alllows IDN
* @return true if authority (hostname and port) is valid.
*/
protected boolean isValidAuthority(String authority) {
@@ -392,8 +368,10 @@ public class UrlValidator implements Ser
if (authorityValidator != null &&
authorityValidator.isValid(authority)) {
return true;
}
+ // convert to ASCII if possible
+ final String authorityASCII =
DomainValidator.unicodeToASCII(authority);
- Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
+ Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authorityASCII);
if (!authorityMatcher.matches()) {
return false;
}
Modified:
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java?rev=1649932&r1=1649931&r2=1649932&view=diff
==============================================================================
---
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
(original)
+++
commons/proper/validator/trunk/src/test/java/org/apache/commons/validator/routines/UrlValidatorTest.java
Tue Jan 6 20:19:21 2015
@@ -153,6 +153,8 @@ public class UrlValidatorTest extends Te
assertTrue("пÑезиденÑ.ÑÑ should validate",
validator.isValid("http://пÑезиденÑ.ÑÑ"));
assertTrue("www.b\u00fccher.ch should validate",
validator.isValid("http://www.b\u00fccher.ch"));
assertFalse("www.\uFFFD.ch FFFD should fail",
validator.isValid("http://www.\uFFFD.ch"));
+ assertTrue("www.b\u00fccher.ch should validate",
validator.isValid("ftp://www.b\u00fccher.ch"));
+ assertFalse("www.\uFFFD.ch FFFD should fail",
validator.isValid("ftp://www.\uFFFD.ch"));
}
public void testValidator248() {