Tapestry does its own encoding because Jetty and Tomcat differ on whether you get the decoded or raw strings. Creating another option, that would work the same across servlet containers, seemed to make sense at the time.
On Fri, Jun 18, 2010 at 8:50 AM, Nicolas Bouillon <nico...@bouil.org> wrote: > Hi, > > Here is how i've overrided this behavior, to allow URL with "%20" or other > chars as incomming request. It quite a copy/paste tweaking of the original > UrlEncoderImpl from Tapestry 5.1.0.5. > > public class AppModule { > ..... > public static void contributeServiceOverride( > MappedConfiguration<Class, Object> configuration) { > configuration.add(URLEncoder.class, new MyURLEncoderImpl()); > } > ..... > } > > > package org.bouil.tapestry.services; > > import java.io.UnsupportedEncodingException; > import java.util.BitSet; > > import org.apache.tapestry5.ioc.internal.util.Defense; > import org.apache.tapestry5.services.URLEncoder; > > public class MyURLEncoderImpl implements URLEncoder { > static final String ENCODED_NULL = "$N"; > static final String ENCODED_BLANK = "$B"; > > /** > * Bit set indicating which character are safe to pass through (when > * encoding or decoding) as-is. All other characters are encoded as a > kind > * of unicode escape. > */ > private final BitSet safeForInput = new BitSet(128); > private final BitSet safeForOutput = new BitSet(128); > > { > > markSafeForInput("aàâäbcçĉdeéèêëfgĝhĥiïîjĵklmnoôöpqrsŝtuùûüvwxyzæœ"); > > markSafeForInput("AÀÂÄBCÇĈDEÉÈÊËFGĜHĤIÏÎĤJĴKLMNOÔÖPQRSŜTUÙÛÜVWXYZÆŒ"); > markSafeForInput("01234567890-_.:,'"); > > markSafeForOuput("abcdefghijklmnopqrstuvwxyz"); > markSafeForOuput("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); > markSafeForOuput("01234567890-_.:,'"); > } > > private void markSafeForInput(String s) { > for (char ch : s.toCharArray()) { > safeForInput.set(ch); > } > } > > private void markSafeForOuput(String s) { > for (char ch : s.toCharArray()) { > safeForOutput.set(ch); > } > } > > public String encode(String input) { > if (input == null) > return ENCODED_NULL; > > if (input.equals("")) > return ENCODED_BLANK; > > boolean dirty = false; > > int length = input.length(); > > StringBuilder output = new StringBuilder(length * 2); > > for (int i = 0; i < length; i++) { > char ch = input.charAt(i); > > if (ch == '$') { > output.append("$$"); > dirty = true; > continue; > } > > int chAsInt = ch; > > if (safeForOutput.get(chAsInt)) { > output.append(ch); > continue; > } > > try { > return java.net.URLEncoder.encode(new String(input), > "UTF-8"); > } catch (UnsupportedEncodingException e) { > throw new IllegalArgumentException(e); > } > // output.append(String.format("$%04x", chAsInt)); > // dirty = true; > } > > return dirty ? output.toString() : input; > } > > public String decode(String input) { > Defense.notNull(input, "input"); > > if (input.equals(ENCODED_NULL)) > return null; > > if (input.equals(ENCODED_BLANK)) > return ""; > > boolean dirty = false; > > int length = input.length(); > > StringBuilder output = new StringBuilder(length * 2); > > for (int i = 0; i < length; i++) { > char ch = input.charAt(i); > > if (ch == '$') { > dirty = true; > > if (i + 1 < length && input.charAt(i + 1) == '$') { > output.append('$'); > i++; > > dirty = true; > continue; > } > > if (i + 4 < length) { > String hex = input.substring(i + 1, i + 5); > > try { > int unicode = Integer.parseInt(hex, 16); > > output.append((char) unicode); > i += 4; > dirty = true; > continue; > } catch (NumberFormatException ex) { > // Ignore. > } > } > > throw new IllegalArgumentException( > String > .format( > "Input string '%s' is not valid; > the '$' character at position %d should be followed by another '$' or a > four digit hex number (a unicode value).", > input, i + 1)); > } > > if (!safeForInput.get(ch)) { > throw new IllegalArgumentException( > String > .format( > "Input string '%s' is not valid; > the character '%s' at position %d is not valid.", > input, ch, i + 1)); > } > > output.append(ch); > } > > return dirty ? output.toString() : input; > } > } > > > > On Fri, 18 Jun 2010 16:08:09 +0100, Joel Halbert <j...@su3analytics.com> > wrote: >> Tapestry appears to URL encode spaces as "$0020" >> >> e.g. >> http://localhost:8080/web/buy/sports$0020shoes >> >> >> I would much prefer to use standard encoding such as: >> >> http://localhost:8080/web/buy/sports+shoes >> or >> http://localhost:8080/web/buy/sports%20shoes >> >> >> Is it possible to configure or override this behaviour? > > --------------------------------------------------------------------- > To unsubscribe, e-mail: users-unsubscr...@tapestry.apache.org > For additional commands, e-mail: users-h...@tapestry.apache.org > > -- Howard M. Lewis Ship Creator of Apache Tapestry The source for Tapestry training, mentoring and support. Contact me to learn how I can get you up and productive in Tapestry fast! (971) 678-5210 http://howardlewisship.com