WICKET-4539 move UrlEncoder and UrlDecoder into wicket-util
Project: http://git-wip-us.apache.org/repos/asf/wicket/repo Commit: http://git-wip-us.apache.org/repos/asf/wicket/commit/64571736 Tree: http://git-wip-us.apache.org/repos/asf/wicket/tree/64571736 Diff: http://git-wip-us.apache.org/repos/asf/wicket/diff/64571736 Branch: refs/heads/master Commit: 6457173650c8158258704bc201e9ed84a2c2708b Parents: 535b2ee Author: Peter Ertl <[email protected]> Authored: Tue May 8 00:53:21 2012 +0200 Committer: Peter Ertl <[email protected]> Committed: Tue May 8 00:53:21 2012 +0200 ---------------------------------------------------------------------- .../org/apache/wicket/markup/html/form/Form.java | 2 +- .../wicket/markup/html/link/DownloadLink.java | 2 +- .../apache/wicket/protocol/http/RequestUtils.java | 2 +- .../protocol/http/mock/MockHttpServletRequest.java | 4 +- .../apache/wicket/protocol/http/WicketURLTest.java | 4 +- .../main/java/org/apache/wicket/request/Url.java | 2 + .../java/org/apache/wicket/request/UrlDecoder.java | 178 ------- .../java/org/apache/wicket/request/UrlEncoder.java | 370 --------------- .../org/apache/wicket/request/UrlEncoderTest.java | 52 -- .../apache/wicket/util/encoding/UrlDecoder.java | 178 +++++++ .../apache/wicket/util/encoding/UrlEncoder.java | 370 +++++++++++++++ .../wicket/util/encoding/UrlEncoderTest.java | 53 ++ 12 files changed, 610 insertions(+), 607 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-core/src/main/java/org/apache/wicket/markup/html/form/Form.java ---------------------------------------------------------------------- diff --git a/wicket-core/src/main/java/org/apache/wicket/markup/html/form/Form.java b/wicket-core/src/main/java/org/apache/wicket/markup/html/form/Form.java index f1ef050..b0e9db3 100644 --- a/wicket-core/src/main/java/org/apache/wicket/markup/html/form/Form.java +++ b/wicket-core/src/main/java/org/apache/wicket/markup/html/form/Form.java @@ -47,11 +47,11 @@ import org.apache.wicket.request.IRequestParameters; import org.apache.wicket.request.Request; import org.apache.wicket.request.Response; import org.apache.wicket.request.Url; -import org.apache.wicket.request.UrlDecoder; import org.apache.wicket.request.UrlRenderer; import org.apache.wicket.request.http.WebRequest; import org.apache.wicket.request.mapper.parameter.PageParameters; import org.apache.wicket.settings.IApplicationSettings; +import org.apache.wicket.util.encoding.UrlDecoder; import org.apache.wicket.util.lang.Args; import org.apache.wicket.util.lang.Bytes; import org.apache.wicket.util.string.AppendingStringBuffer; http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-core/src/main/java/org/apache/wicket/markup/html/link/DownloadLink.java ---------------------------------------------------------------------- diff --git a/wicket-core/src/main/java/org/apache/wicket/markup/html/link/DownloadLink.java b/wicket-core/src/main/java/org/apache/wicket/markup/html/link/DownloadLink.java index b3b8848..3974b18 100644 --- a/wicket-core/src/main/java/org/apache/wicket/markup/html/link/DownloadLink.java +++ b/wicket-core/src/main/java/org/apache/wicket/markup/html/link/DownloadLink.java @@ -21,10 +21,10 @@ import java.io.File; import org.apache.wicket.model.IModel; import org.apache.wicket.model.Model; import org.apache.wicket.request.IRequestCycle; -import org.apache.wicket.request.UrlEncoder; import org.apache.wicket.request.handler.resource.ResourceStreamRequestHandler; import org.apache.wicket.request.resource.ContentDisposition; import org.apache.wicket.settings.IResourceSettings; +import org.apache.wicket.util.encoding.UrlEncoder; import org.apache.wicket.util.file.Files; import org.apache.wicket.util.lang.Args; import org.apache.wicket.util.resource.FileResourceStream; http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-core/src/main/java/org/apache/wicket/protocol/http/RequestUtils.java ---------------------------------------------------------------------- diff --git a/wicket-core/src/main/java/org/apache/wicket/protocol/http/RequestUtils.java b/wicket-core/src/main/java/org/apache/wicket/protocol/http/RequestUtils.java index 0d87a70..cf832bc 100644 --- a/wicket-core/src/main/java/org/apache/wicket/protocol/http/RequestUtils.java +++ b/wicket-core/src/main/java/org/apache/wicket/protocol/http/RequestUtils.java @@ -24,9 +24,9 @@ import java.util.List; import javax.servlet.http.HttpServletRequest; import org.apache.wicket.Application; -import org.apache.wicket.request.UrlDecoder; import org.apache.wicket.request.cycle.RequestCycle; import org.apache.wicket.request.mapper.parameter.PageParameters; +import org.apache.wicket.util.encoding.UrlDecoder; import org.apache.wicket.util.string.Strings; /** http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-core/src/main/java/org/apache/wicket/protocol/http/mock/MockHttpServletRequest.java ---------------------------------------------------------------------- diff --git a/wicket-core/src/main/java/org/apache/wicket/protocol/http/mock/MockHttpServletRequest.java b/wicket-core/src/main/java/org/apache/wicket/protocol/http/mock/MockHttpServletRequest.java index 3fa725d..e97ae3a 100755 --- a/wicket-core/src/main/java/org/apache/wicket/protocol/http/mock/MockHttpServletRequest.java +++ b/wicket-core/src/main/java/org/apache/wicket/protocol/http/mock/MockHttpServletRequest.java @@ -52,8 +52,8 @@ import org.apache.wicket.WicketRuntimeException; import org.apache.wicket.mock.MockRequestParameters; import org.apache.wicket.request.Url; import org.apache.wicket.request.Url.QueryParameter; -import org.apache.wicket.request.UrlDecoder; -import org.apache.wicket.request.UrlEncoder; +import org.apache.wicket.util.encoding.UrlDecoder; +import org.apache.wicket.util.encoding.UrlEncoder; import org.apache.wicket.util.file.File; import org.apache.wicket.util.io.IOUtils; import org.apache.wicket.util.string.StringValue; http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-core/src/test/java/org/apache/wicket/protocol/http/WicketURLTest.java ---------------------------------------------------------------------- diff --git a/wicket-core/src/test/java/org/apache/wicket/protocol/http/WicketURLTest.java b/wicket-core/src/test/java/org/apache/wicket/protocol/http/WicketURLTest.java index d49a858..2098ec8 100644 --- a/wicket-core/src/test/java/org/apache/wicket/protocol/http/WicketURLTest.java +++ b/wicket-core/src/test/java/org/apache/wicket/protocol/http/WicketURLTest.java @@ -16,8 +16,8 @@ */ package org.apache.wicket.protocol.http; -import org.apache.wicket.request.UrlDecoder; -import org.apache.wicket.request.UrlEncoder; +import org.apache.wicket.util.encoding.UrlDecoder; +import org.apache.wicket.util.encoding.UrlEncoder; import org.junit.Assert; import org.junit.Test; http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-request/src/main/java/org/apache/wicket/request/Url.java ---------------------------------------------------------------------- diff --git a/wicket-request/src/main/java/org/apache/wicket/request/Url.java b/wicket-request/src/main/java/org/apache/wicket/request/Url.java index acb4287..43941f4 100755 --- a/wicket-request/src/main/java/org/apache/wicket/request/Url.java +++ b/wicket-request/src/main/java/org/apache/wicket/request/Url.java @@ -24,6 +24,8 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; +import org.apache.wicket.util.encoding.UrlDecoder; +import org.apache.wicket.util.encoding.UrlEncoder; import org.apache.wicket.util.lang.Args; import org.apache.wicket.util.lang.Generics; import org.apache.wicket.util.lang.Objects; http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-request/src/main/java/org/apache/wicket/request/UrlDecoder.java ---------------------------------------------------------------------- diff --git a/wicket-request/src/main/java/org/apache/wicket/request/UrlDecoder.java b/wicket-request/src/main/java/org/apache/wicket/request/UrlDecoder.java deleted file mode 100644 index 2c361fe..0000000 --- a/wicket-request/src/main/java/org/apache/wicket/request/UrlDecoder.java +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.wicket.request; - -import java.io.UnsupportedEncodingException; -import java.nio.charset.Charset; - -/** - * Adapted from java.net.URLDecoder, but defines instances for query string decoding versus URL path - * component decoding. - * <p/> - * The difference is important because a space is encoded as a + in a query string, but this is a - * valid value in a path component (and is therefore not decode back to a space). - * - * @author Doug Donohoe - * @see java.net.URLDecoder - * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> - */ -public class UrlDecoder -{ - private final boolean decodePlus; - - /** - * Encoder used to decode name or value components of a query string.<br/> - * <br/> - * - * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart - */ - public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true); - - /** - * Encoder used to decode components of a path.<br/> - * <br/> - * - * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart - */ - public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false); - - /** - * Create decoder - * - * @param decodePlus - * - whether to decode + to space - */ - private UrlDecoder(final boolean decodePlus) - { - this.decodePlus = decodePlus; - } - - /** - * @param s - * string to decode - * @param enc - * encoding to decode with - * @return decoded string - * @see java.net.URLDecoder#decode(String, String) - */ - public String decode(final String s, final Charset enc) - { - return decode(s, enc.name()); - } - - /** - * @param s - * string to decode - * @param enc - * encoding to decode with - * @return decoded string - * @see java.net.URLDecoder#decode(String, String) - */ - public String decode(final String s, final String enc) - { - if (s == null) - { - return null; - } - - boolean needToChange = false; - int numChars = s.length(); - StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); - int i = 0; - - if (enc.length() == 0) - { - throw new RuntimeException(new UnsupportedEncodingException( - "URLDecoder: empty string enc parameter")); - } - - char c; - byte[] bytes = null; - while (i < numChars) - { - c = s.charAt(i); - switch (c) - { - case '+' : - sb.append(decodePlus ? ' ' : '+'); - i++; - needToChange = true; - break; - - case '%' : - /* - * Starting with this instance of %, process all consecutive substrings of the - * form %xy. Each substring %xy will yield a byte. Convert all consecutive bytes - * obtained this way to whatever character(s) they represent in the provided - * encoding. - */ - try - { - // (numChars-i)/3 is an upper bound for the number - // of remaining bytes - if (bytes == null) - { - bytes = new byte[(numChars - i) / 3]; - } - int pos = 0; - - while (((i + 2) < numChars) && (c == '%')) - { - bytes[pos++] = (byte)Integer.parseInt(s.substring(i + 1, i + 3), 16); - i += 3; - if (i < numChars) - { - c = s.charAt(i); - } - } - - // A trailing, incomplete byte encoding such as - // "%x" will cause an exception to be thrown - if ((i < numChars) && (c == '%')) - { - throw new IllegalArgumentException( - "URLDecoder: Incomplete trailing escape (%) pattern"); - } - - try - { - sb.append(new String(bytes, 0, pos, enc)); - } - catch (UnsupportedEncodingException e) - { - throw new RuntimeException(e); - } - } - catch (NumberFormatException e) - { - throw new IllegalArgumentException( - "URLDecoder: Illegal hex characters in escape (%) pattern - " + - e.getMessage()); - } - needToChange = true; - break; - - default : - sb.append(c); - i++; - break; - } - } - - return (needToChange ? sb.toString() : s); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-request/src/main/java/org/apache/wicket/request/UrlEncoder.java ---------------------------------------------------------------------- diff --git a/wicket-request/src/main/java/org/apache/wicket/request/UrlEncoder.java b/wicket-request/src/main/java/org/apache/wicket/request/UrlEncoder.java deleted file mode 100644 index b5db2d1..0000000 --- a/wicket-request/src/main/java/org/apache/wicket/request/UrlEncoder.java +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.wicket.request; - -import java.io.CharArrayWriter; -import java.io.UnsupportedEncodingException; -import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException; -import java.util.BitSet; - -import org.apache.wicket.util.lang.Args; - -/** - * Adapted from java.net.URLEncoder, but defines instances for query string encoding versus URL path - * component encoding. - * <p/> - * The difference is important because a space is encoded as a + in a query string, but this is a - * valid value in a path component (and is therefore not decode back to a space). - * - * @author Doug Donohoe - * @see java.net.URLEncoder - * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> - */ -public class UrlEncoder -{ - /** - * encoder types - */ - public enum Type { - /** - * query type - */ - QUERY, - /** - * path type - */ - PATH, - /** - * full path type - */ - FULL_PATH - } - - // list of what not to decode - protected BitSet dontNeedEncoding; - - // E.g. "?" for FULL_PATH encoding when querystring has already been - // encoded. - private final char stopChar; - - // used in decoding - protected static final int caseDiff = ('a' - 'A'); - - /** - * Encoder used to encode name or value components of a query string.<br/> - * <br/> - * - * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart - */ - public static final UrlEncoder QUERY_INSTANCE = new UrlEncoder(Type.QUERY, '\0'); - - /** - * Encoder used to encode components of a path.<br/> - * <br/> - * - * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart - */ - public static final UrlEncoder PATH_INSTANCE = new UrlEncoder(Type.PATH, '\0'); - - /** - * Encoder used to encode all path segments. Querystring will be excluded.<br/> - * <br/> - * - * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart - */ - public static final UrlEncoder FULL_PATH_INSTANCE = new UrlEncoder(Type.FULL_PATH, '?'); - - /** - * Allow subclass to call constructor. - * - * @param type - * encoder type - * @param stopChar - * stop encoding when stopChar found - */ - protected UrlEncoder(final Type type, final char stopChar) - { - this.stopChar = stopChar; - - /* - * This note from java.net.URLEncoder ================================== - * - * The list of characters that are not encoded has been determined as follows: - * - * RFC 2396 states: ----- Data characters that are allowed in a URI but do not have a - * reserved purpose are called unreserved. These include upper and lower case letters, - * decimal digits, and a limited set of punctuation marks and symbols. - * - * unreserved = alphanum | mark - * - * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" - * - * Unreserved characters can be escaped without changing the semantics of the URI, but this - * should not be done unless the URI is being used in a context that does not allow the - * unescaped character to appear. ----- - * - * It appears that both Netscape and Internet Explorer escape all special characters from - * this list with the exception of "-", "_", ".", "*". While it is not clear why they are - * escaping the other characters, perhaps it is safest to assume that there might be - * contexts in which the others are unsafe if not escaped. Therefore, we will use the same - * list. It is also noteworthy that this is consistent with O'Reilly's - * "HTML: The Definitive Guide" (page 164). - * - * As a last note, Intenet Explorer does not encode the "@" character which is clearly not - * unreserved according to the RFC. We are being consistent with the RFC in this matter, as - * is Netscape. - * - * This bit added by Doug Donohoe ================================== RFC 3986 (2005) updates - * this (http://tools.ietf.org/html/rfc3986): - * - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * - * pct-encoded = "%" HEXDIG HEXDIG - * - * reserved = gen-delims / sub-delims - * - * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - * - * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" // -- PATH - * COMPONENT -- // - * - * path = (see RFC for all variations) path-abempty =( "/" segment ) segment =pchar pchar = - * unreserved / pct-encoded / sub-delims / ":" / "@" // -- QUERY COMPONENT -- // - * - * query =( pchar / "/" / "?" ) - */ - - // unreserved - dontNeedEncoding = new BitSet(256); - int i; - for (i = 'a'; i <= 'z'; i++) - { - dontNeedEncoding.set(i); - } - for (i = 'A'; i <= 'Z'; i++) - { - dontNeedEncoding.set(i); - } - for (i = '0'; i <= '9'; i++) - { - dontNeedEncoding.set(i); - } - dontNeedEncoding.set('-'); - dontNeedEncoding.set('.'); - dontNeedEncoding.set('_'); - // tilde encoded by java.net.URLEncoder version, but RFC is clear on this - dontNeedEncoding.set('~'); - - // sub-delims - dontNeedEncoding.set('!'); - dontNeedEncoding.set('$'); - // "&" needs to be encoded for query stings - // "(" and ")" probably don't need encoding, but we'll be conservative - dontNeedEncoding.set('*'); - // "+" needs to be encoded for query strings (since it means = - dontNeedEncoding.set(','); - // ";" encoded due to use in path and/or query as delim in some - // instances (e.g., jsessionid) - // "=" needs to be encoded for query strings - - // pchar - dontNeedEncoding.set(':'); // allowed and used in wicket interface - // params - dontNeedEncoding.set('@'); - - // encoding type-specific - switch (type) - { - // this code consistent with java.net.URLEncoder version - case QUERY : - // encoding a space to a + is done in the encode() method - dontNeedEncoding.set(' '); - // to allow direct passing of URL in query - dontNeedEncoding.set('/'); - - /* - * the below encoding of a ? is disabled because it interferes in portlet - * environments. as far as i can tell it will not interfere with the ability to pass - * around urls in the query string. however, should it cause problems we can - * re-enable it as portlet environments are not high priority. we can also add a - * switch somewhere to enable/disable this on applicaiton level. (WICKET-4019) - */ - - // to allow direct passing of URL in query - // dontNeedEncoding.set('?'); - break; - - // this added to deal with encoding a PATH component - case PATH : - // encode ' ' with a % instead of + in path portion - - // path component sub-delim values we do not need to escape - dontNeedEncoding.set('&'); - dontNeedEncoding.set('='); - dontNeedEncoding.set('+'); - // don't encode semicolon because it is used in ;jsessionid= - dontNeedEncoding.set(';'); - break; - - // same as path, but '/' will not be encoded - case FULL_PATH : - // encode ' ' with a % instead of + in path portion - - // path component sub-delim values we do not need to escape - dontNeedEncoding.set('&'); - dontNeedEncoding.set('='); - dontNeedEncoding.set('+'); - - dontNeedEncoding.set('/'); - break; - } - } - - /** - * @param s - * string to encode - * @param charset - * charset to use for encoding - * @return encoded string - * @see java.net.URLEncoder#encode(String, String) - */ - public String encode(final String s, final Charset charset) - { - return encode(s, charset.name()); - } - - /** - * @param s - * string to encode - * @param charsetName - * encoding to use - * @return encoded string - * @see java.net.URLEncoder#encode(String, String) - */ - public String encode(final String s, final String charsetName) - { - boolean needToChange = false; - StringBuilder out = new StringBuilder(s.length()); - Charset charset; - CharArrayWriter charArrayWriter = new CharArrayWriter(); - - Args.notNull(charsetName, "charsetName"); - - try - { - charset = Charset.forName(charsetName); - } - catch (IllegalCharsetNameException e) - { - throw new RuntimeException(new UnsupportedEncodingException(charsetName)); - } - catch (UnsupportedCharsetException e) - { - throw new RuntimeException(new UnsupportedEncodingException(charsetName)); - } - - boolean stopEncoding = false; - for (int i = 0; i < s.length();) - { - int c = s.charAt(i); - - if ((stopEncoding == false) && (c == stopChar)) - { - stopEncoding = true; - } - - // System.out.println("Examining character: " + c); - if ((stopEncoding == true) || dontNeedEncoding.get(c)) - { - if (c == ' ') - { - c = '+'; - needToChange = true; - } - // System.out.println("Storing: " + c); - out.append((char)c); - i++; - } - else - { - // convert to external encoding before hex conversion - do - { - charArrayWriter.write(c); - /* - * If this character represents the start of a Unicode surrogate pair, then pass - * in two characters. It's not clear what should be done if a bytes reserved in - * the surrogate pairs range occurs outside of a legal surrogate pair. For now, - * just treat it as if it were any other character. - */ - if ((c >= 0xD800) && (c <= 0xDBFF)) - { - /* - * System.out.println(Integer.toHexString(c) + " is high surrogate"); - */ - if ((i + 1) < s.length()) - { - int d = s.charAt(i + 1); - /* - * System.out.println("\tExamining " + Integer.toHexString(d)); - */ - if ((d >= 0xDC00) && (d <= 0xDFFF)) - { - /* - * System.out.println("\t" + Integer.toHexString(d) + " is low - * surrogate"); - */ - charArrayWriter.write(d); - i++; - } - } - } - i++; - } - while ((i < s.length()) && !dontNeedEncoding.get((c = s.charAt(i)))); - - charArrayWriter.flush(); - String str = new String(charArrayWriter.toCharArray()); - byte[] ba = str.getBytes(charset); - for (byte b : ba) - { - out.append('%'); - char ch = Character.forDigit((b >> 4) & 0xF, 16); - // converting to use uppercase letter as part of - // the hex value if ch is a letter. - if (Character.isLetter(ch)) - { - ch -= caseDiff; - } - out.append(ch); - ch = Character.forDigit(b & 0xF, 16); - if (Character.isLetter(ch)) - { - ch -= caseDiff; - } - out.append(ch); - } - charArrayWriter.reset(); - needToChange = true; - } - } - - return (needToChange ? out.toString() : s); - } -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-request/src/test/java/org/apache/wicket/request/UrlEncoderTest.java ---------------------------------------------------------------------- diff --git a/wicket-request/src/test/java/org/apache/wicket/request/UrlEncoderTest.java b/wicket-request/src/test/java/org/apache/wicket/request/UrlEncoderTest.java deleted file mode 100644 index f44234f..0000000 --- a/wicket-request/src/test/java/org/apache/wicket/request/UrlEncoderTest.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.wicket.request; - -import org.apache.wicket.util.crypt.CharEncoding; -import org.junit.Assert; -import org.junit.Test; - -/** - * Tests for {@link UrlDecoder} - */ -public class UrlEncoderTest extends Assert -{ - - /** - * <a href="https://issues.apache.org/jira/browse/WICKET-3721">WICKET-3721</a> Encode - * apostrophes because otherwise they get XML encoded by ComponentTag#writeOutput() to - * &#039; and eventually break links with javascript: - */ - @Test - public void encodeApostrophe() - { - assertEquals("someone%27s%20bad%20url", - UrlEncoder.FULL_PATH_INSTANCE.encode("someone's bad url", CharEncoding.UTF_8)); - } - - /** - * Do not encode semicolon in the Url's path because it is used in ';jsessionid=...' - * - * https://issues.apache.org/jira/browse/WICKET-4409 - */ - @Test - public void dontEncodeSemicolon() - { - String encoded = UrlEncoder.PATH_INSTANCE.encode("path;jsessionid=1234567890", CharEncoding.UTF_8); - assertEquals("path;jsessionid=1234567890", encoded); - } -} http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlDecoder.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlDecoder.java b/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlDecoder.java new file mode 100644 index 0000000..5f6d756 --- /dev/null +++ b/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlDecoder.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.encoding; + +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; + +/** + * Adapted from java.net.URLDecoder, but defines instances for query string decoding versus URL path + * component decoding. + * <p/> + * The difference is important because a space is encoded as a + in a query string, but this is a + * valid value in a path component (and is therefore not decode back to a space). + * + * @author Doug Donohoe + * @see java.net.URLDecoder + * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> + */ +public class UrlDecoder +{ + private final boolean decodePlus; + + /** + * Encoder used to decode name or value components of a query string.<br/> + * <br/> + * + * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart + */ + public static final UrlDecoder QUERY_INSTANCE = new UrlDecoder(true); + + /** + * Encoder used to decode components of a path.<br/> + * <br/> + * + * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart + */ + public static final UrlDecoder PATH_INSTANCE = new UrlDecoder(false); + + /** + * Create decoder + * + * @param decodePlus + * - whether to decode + to space + */ + private UrlDecoder(final boolean decodePlus) + { + this.decodePlus = decodePlus; + } + + /** + * @param s + * string to decode + * @param enc + * encoding to decode with + * @return decoded string + * @see java.net.URLDecoder#decode(String, String) + */ + public String decode(final String s, final Charset enc) + { + return decode(s, enc.name()); + } + + /** + * @param s + * string to decode + * @param enc + * encoding to decode with + * @return decoded string + * @see java.net.URLDecoder#decode(String, String) + */ + public String decode(final String s, final String enc) + { + if (s == null) + { + return null; + } + + boolean needToChange = false; + int numChars = s.length(); + StringBuilder sb = new StringBuilder(numChars > 500 ? numChars / 2 : numChars); + int i = 0; + + if (enc.length() == 0) + { + throw new RuntimeException(new UnsupportedEncodingException( + "URLDecoder: empty string enc parameter")); + } + + char c; + byte[] bytes = null; + while (i < numChars) + { + c = s.charAt(i); + switch (c) + { + case '+' : + sb.append(decodePlus ? ' ' : '+'); + i++; + needToChange = true; + break; + + case '%' : + /* + * Starting with this instance of %, process all consecutive substrings of the + * form %xy. Each substring %xy will yield a byte. Convert all consecutive bytes + * obtained this way to whatever character(s) they represent in the provided + * encoding. + */ + try + { + // (numChars-i)/3 is an upper bound for the number + // of remaining bytes + if (bytes == null) + { + bytes = new byte[(numChars - i) / 3]; + } + int pos = 0; + + while (((i + 2) < numChars) && (c == '%')) + { + bytes[pos++] = (byte)Integer.parseInt(s.substring(i + 1, i + 3), 16); + i += 3; + if (i < numChars) + { + c = s.charAt(i); + } + } + + // A trailing, incomplete byte encoding such as + // "%x" will cause an exception to be thrown + if ((i < numChars) && (c == '%')) + { + throw new IllegalArgumentException( + "URLDecoder: Incomplete trailing escape (%) pattern"); + } + + try + { + sb.append(new String(bytes, 0, pos, enc)); + } + catch (UnsupportedEncodingException e) + { + throw new RuntimeException(e); + } + } + catch (NumberFormatException e) + { + throw new IllegalArgumentException( + "URLDecoder: Illegal hex characters in escape (%) pattern - " + + e.getMessage()); + } + needToChange = true; + break; + + default : + sb.append(c); + i++; + break; + } + } + + return (needToChange ? sb.toString() : s); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlEncoder.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlEncoder.java b/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlEncoder.java new file mode 100644 index 0000000..52044da --- /dev/null +++ b/wicket-util/src/main/java/org/apache/wicket/util/encoding/UrlEncoder.java @@ -0,0 +1,370 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.encoding; + +import java.io.CharArrayWriter; +import java.io.UnsupportedEncodingException; +import java.nio.charset.Charset; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; +import java.util.BitSet; + +import org.apache.wicket.util.lang.Args; + +/** + * Adapted from java.net.URLEncoder, but defines instances for query string encoding versus URL path + * component encoding. + * <p/> + * The difference is important because a space is encoded as a + in a query string, but this is a + * valid value in a path component (and is therefore not decode back to a space). + * + * @author Doug Donohoe + * @see java.net.URLEncoder + * @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC-2396</a> + */ +public class UrlEncoder +{ + /** + * encoder types + */ + public enum Type { + /** + * query type + */ + QUERY, + /** + * path type + */ + PATH, + /** + * full path type + */ + FULL_PATH + } + + // list of what not to decode + protected BitSet dontNeedEncoding; + + // E.g. "?" for FULL_PATH encoding when querystring has already been + // encoded. + private final char stopChar; + + // used in decoding + protected static final int caseDiff = ('a' - 'A'); + + /** + * Encoder used to encode name or value components of a query string.<br/> + * <br/> + * + * For example: http://org.acme/notthis/northis/oreventhis?buthis=isokay&asis=thispart + */ + public static final UrlEncoder QUERY_INSTANCE = new UrlEncoder(Type.QUERY, '\0'); + + /** + * Encoder used to encode components of a path.<br/> + * <br/> + * + * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart + */ + public static final UrlEncoder PATH_INSTANCE = new UrlEncoder(Type.PATH, '\0'); + + /** + * Encoder used to encode all path segments. Querystring will be excluded.<br/> + * <br/> + * + * For example: http://org.acme/foo/thispart/orthispart?butnot=thispart + */ + public static final UrlEncoder FULL_PATH_INSTANCE = new UrlEncoder(Type.FULL_PATH, '?'); + + /** + * Allow subclass to call constructor. + * + * @param type + * encoder type + * @param stopChar + * stop encoding when stopChar found + */ + protected UrlEncoder(final Type type, final char stopChar) + { + this.stopChar = stopChar; + + /* + * This note from java.net.URLEncoder ================================== + * + * The list of characters that are not encoded has been determined as follows: + * + * RFC 2396 states: ----- Data characters that are allowed in a URI but do not have a + * reserved purpose are called unreserved. These include upper and lower case letters, + * decimal digits, and a limited set of punctuation marks and symbols. + * + * unreserved = alphanum | mark + * + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + * + * Unreserved characters can be escaped without changing the semantics of the URI, but this + * should not be done unless the URI is being used in a context that does not allow the + * unescaped character to appear. ----- + * + * It appears that both Netscape and Internet Explorer escape all special characters from + * this list with the exception of "-", "_", ".", "*". While it is not clear why they are + * escaping the other characters, perhaps it is safest to assume that there might be + * contexts in which the others are unsafe if not escaped. Therefore, we will use the same + * list. It is also noteworthy that this is consistent with O'Reilly's + * "HTML: The Definitive Guide" (page 164). + * + * As a last note, Intenet Explorer does not encode the "@" character which is clearly not + * unreserved according to the RFC. We are being consistent with the RFC in this matter, as + * is Netscape. + * + * This bit added by Doug Donohoe ================================== RFC 3986 (2005) updates + * this (http://tools.ietf.org/html/rfc3986): + * + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * + * pct-encoded = "%" HEXDIG HEXDIG + * + * reserved = gen-delims / sub-delims + * + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + * + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" // -- PATH + * COMPONENT -- // + * + * path = (see RFC for all variations) path-abempty =( "/" segment ) segment =pchar pchar = + * unreserved / pct-encoded / sub-delims / ":" / "@" // -- QUERY COMPONENT -- // + * + * query =( pchar / "/" / "?" ) + */ + + // unreserved + dontNeedEncoding = new BitSet(256); + int i; + for (i = 'a'; i <= 'z'; i++) + { + dontNeedEncoding.set(i); + } + for (i = 'A'; i <= 'Z'; i++) + { + dontNeedEncoding.set(i); + } + for (i = '0'; i <= '9'; i++) + { + dontNeedEncoding.set(i); + } + dontNeedEncoding.set('-'); + dontNeedEncoding.set('.'); + dontNeedEncoding.set('_'); + // tilde encoded by java.net.URLEncoder version, but RFC is clear on this + dontNeedEncoding.set('~'); + + // sub-delims + dontNeedEncoding.set('!'); + dontNeedEncoding.set('$'); + // "&" needs to be encoded for query stings + // "(" and ")" probably don't need encoding, but we'll be conservative + dontNeedEncoding.set('*'); + // "+" needs to be encoded for query strings (since it means = + dontNeedEncoding.set(','); + // ";" encoded due to use in path and/or query as delim in some + // instances (e.g., jsessionid) + // "=" needs to be encoded for query strings + + // pchar + dontNeedEncoding.set(':'); // allowed and used in wicket interface + // params + dontNeedEncoding.set('@'); + + // encoding type-specific + switch (type) + { + // this code consistent with java.net.URLEncoder version + case QUERY : + // encoding a space to a + is done in the encode() method + dontNeedEncoding.set(' '); + // to allow direct passing of URL in query + dontNeedEncoding.set('/'); + + /* + * the below encoding of a ? is disabled because it interferes in portlet + * environments. as far as i can tell it will not interfere with the ability to pass + * around urls in the query string. however, should it cause problems we can + * re-enable it as portlet environments are not high priority. we can also add a + * switch somewhere to enable/disable this on applicaiton level. (WICKET-4019) + */ + + // to allow direct passing of URL in query + // dontNeedEncoding.set('?'); + break; + + // this added to deal with encoding a PATH component + case PATH : + // encode ' ' with a % instead of + in path portion + + // path component sub-delim values we do not need to escape + dontNeedEncoding.set('&'); + dontNeedEncoding.set('='); + dontNeedEncoding.set('+'); + // don't encode semicolon because it is used in ;jsessionid= + dontNeedEncoding.set(';'); + break; + + // same as path, but '/' will not be encoded + case FULL_PATH : + // encode ' ' with a % instead of + in path portion + + // path component sub-delim values we do not need to escape + dontNeedEncoding.set('&'); + dontNeedEncoding.set('='); + dontNeedEncoding.set('+'); + + dontNeedEncoding.set('/'); + break; + } + } + + /** + * @param s + * string to encode + * @param charset + * charset to use for encoding + * @return encoded string + * @see java.net.URLEncoder#encode(String, String) + */ + public String encode(final String s, final Charset charset) + { + return encode(s, charset.name()); + } + + /** + * @param s + * string to encode + * @param charsetName + * encoding to use + * @return encoded string + * @see java.net.URLEncoder#encode(String, String) + */ + public String encode(final String s, final String charsetName) + { + boolean needToChange = false; + StringBuilder out = new StringBuilder(s.length()); + Charset charset; + CharArrayWriter charArrayWriter = new CharArrayWriter(); + + Args.notNull(charsetName, "charsetName"); + + try + { + charset = Charset.forName(charsetName); + } + catch (IllegalCharsetNameException e) + { + throw new RuntimeException(new UnsupportedEncodingException(charsetName)); + } + catch (UnsupportedCharsetException e) + { + throw new RuntimeException(new UnsupportedEncodingException(charsetName)); + } + + boolean stopEncoding = false; + for (int i = 0; i < s.length();) + { + int c = s.charAt(i); + + if ((stopEncoding == false) && (c == stopChar)) + { + stopEncoding = true; + } + + // System.out.println("Examining character: " + c); + if ((stopEncoding == true) || dontNeedEncoding.get(c)) + { + if (c == ' ') + { + c = '+'; + needToChange = true; + } + // System.out.println("Storing: " + c); + out.append((char)c); + i++; + } + else + { + // convert to external encoding before hex conversion + do + { + charArrayWriter.write(c); + /* + * If this character represents the start of a Unicode surrogate pair, then pass + * in two characters. It's not clear what should be done if a bytes reserved in + * the surrogate pairs range occurs outside of a legal surrogate pair. For now, + * just treat it as if it were any other character. + */ + if ((c >= 0xD800) && (c <= 0xDBFF)) + { + /* + * System.out.println(Integer.toHexString(c) + " is high surrogate"); + */ + if ((i + 1) < s.length()) + { + int d = s.charAt(i + 1); + /* + * System.out.println("\tExamining " + Integer.toHexString(d)); + */ + if ((d >= 0xDC00) && (d <= 0xDFFF)) + { + /* + * System.out.println("\t" + Integer.toHexString(d) + " is low + * surrogate"); + */ + charArrayWriter.write(d); + i++; + } + } + } + i++; + } + while ((i < s.length()) && !dontNeedEncoding.get((c = s.charAt(i)))); + + charArrayWriter.flush(); + String str = new String(charArrayWriter.toCharArray()); + byte[] ba = str.getBytes(charset); + for (byte b : ba) + { + out.append('%'); + char ch = Character.forDigit((b >> 4) & 0xF, 16); + // converting to use uppercase letter as part of + // the hex value if ch is a letter. + if (Character.isLetter(ch)) + { + ch -= caseDiff; + } + out.append(ch); + ch = Character.forDigit(b & 0xF, 16); + if (Character.isLetter(ch)) + { + ch -= caseDiff; + } + out.append(ch); + } + charArrayWriter.reset(); + needToChange = true; + } + } + + return (needToChange ? out.toString() : s); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/wicket/blob/64571736/wicket-util/src/test/java/org/apache/wicket/util/encoding/UrlEncoderTest.java ---------------------------------------------------------------------- diff --git a/wicket-util/src/test/java/org/apache/wicket/util/encoding/UrlEncoderTest.java b/wicket-util/src/test/java/org/apache/wicket/util/encoding/UrlEncoderTest.java new file mode 100644 index 0000000..6679a63 --- /dev/null +++ b/wicket-util/src/test/java/org/apache/wicket/util/encoding/UrlEncoderTest.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.wicket.util.encoding; + +import org.apache.wicket.util.crypt.CharEncoding; +import org.apache.wicket.util.encoding.UrlEncoder; +import org.junit.Assert; +import org.junit.Test; + +/** + * Tests for {@link org.apache.wicket.util.encoding.UrlDecoder} + */ +public class UrlEncoderTest extends Assert +{ + + /** + * <a href="https://issues.apache.org/jira/browse/WICKET-3721">WICKET-3721</a> Encode + * apostrophes because otherwise they get XML encoded by ComponentTag#writeOutput() to + * &#039; and eventually break links with javascript: + */ + @Test + public void encodeApostrophe() + { + assertEquals("someone%27s%20bad%20url", + UrlEncoder.FULL_PATH_INSTANCE.encode("someone's bad url", CharEncoding.UTF_8)); + } + + /** + * Do not encode semicolon in the Url's path because it is used in ';jsessionid=...' + * + * https://issues.apache.org/jira/browse/WICKET-4409 + */ + @Test + public void dontEncodeSemicolon() + { + String encoded = UrlEncoder.PATH_INSTANCE.encode("path;jsessionid=1234567890", CharEncoding.UTF_8); + assertEquals("path;jsessionid=1234567890", encoded); + } +}
