Hi,
Is there a reason we removed the URLDecoder.decode() calls in the {else} block
of this, in ContentTypeFilter ?
>
> if (SystemProperty.getBooleanProperty(SystemProperty.MAGNOLIA_UTF8_ENABLED)) {
Can we avoid constantly having to call SystemProperty.getBooleanProperty in
each and every corner where this might be needed ?
UnicodeNormalizer should not have to check this property - or if so, that'd be
only in the "autodetect" implementation of it, so that it'd use the
NonNormalizing impl if the property is set to false.
Why do we have to normalize again in CosMultipartRequestFilter, if
ContentTypeFilter already wrapped the request in a normalizing request wrapper ?
-g
Begin forwarded message:
> From: [email protected]
> Date: March 4, 2010 12:11:14GMT+01:00
> To: [email protected]
> Subject: [magnolia-svn] [32381] [MAGNOLIA-3009] NFD/NFC normalization added
> (force to NFC):
> Reply-To: "Magnolia SVN-List" <[email protected]>
>
> Revision
> 32381
> Author
> lucaboati
> Date
> 2010-03-04 12:11:13 +0100 (Thu, 04 Mar 2010)
> Log Message
>
> [MAGNOLIA-3009
> ] NFD/NFC normalization added (force to NFC):
> - ContentTypeFilter and CosMultipartRequestFilter normalizing each parameters
> - ImportXmlRootFilter normalizing sv:name of xml attributes
>
> Modified Paths
>
> •
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/ContentTypeFilter.java
> •
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/CosMultipartRequestFilter.java
> •
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/util/UnicodeNormalizer.java
> •
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/importexport/filters/ImportXmlRootFilter.java
> •
> community/magnolia/branches/utf8support/magnolia-core/src/test/java/info/magnolia/cms/util/UnicodeNormalizerTest.java
> Diff
>
> Modified:
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/ContentTypeFilter.java
> (32380 => 32381)
>
> ---
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/ContentTypeFilter.java
> 2010-03-04 11:07:34 UTC (rev 32380)
> +++
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/ContentTypeFilter.java
> 2010-03-04 11:11:13 UTC (rev 32381)
>
> @@ -35,19 +35,25 @@
>
>
>
> import info.magnolia.cms.beans.config.MIMEMapping;
>
> import info.magnolia.cms.core.AggregationState;
>
> +import info.magnolia.cms.core.SystemProperty;
> +import info.magnolia.cms.util.UnicodeNormalizer;
>
> import info.magnolia.context.MgnlContext;
>
>
>
> import java.io.IOException;
>
> import java.io.UnsupportedEncodingException;
>
> import java.net.URLDecoder;
>
> +import java.util.HashMap;
> +import java.util.Map;
>
>
>
> import javax.servlet.FilterChain;
>
> import javax.servlet.ServletException;
>
> import javax.servlet.http.HttpServletRequest;
>
> +import javax.servlet.http.HttpServletRequestWrapper;
>
> import javax.servlet.http.HttpServletResponse;
>
>
>
> import org.apache.commons.lang.StringUtils;
>
>
>
> +
>
> /**
>
> * TODO : rename this filter. What it really does is initialize and setup
> the basic,
>
> * non-content related attributes of the AggregationState. ContentType could
> become an
>
> @@ -58,7 +64,8 @@
>
> * @author gjoseph
>
> * @version $Id$
>
> */
>
> -public class ContentTypeFilter extends AbstractMgnlFilter {
>
> +public class ContentTypeFilter extends AbstractMgnlFilter{
> +
>
> private static final org.slf4j.Logger log =
> org.slf4j.LoggerFactory.getLogger(ContentTypeFilter.class);
>
>
>
> public void doFilter(HttpServletRequest request, HttpServletResponse
> response, FilterChain chain) throws IOException, ServletException {
>
> @@ -69,7 +76,7 @@
>
> if (!StringUtils.isEmpty(query))
>
> {
>
> log.info(query);
>
> - url.append("?").append(query);
>
> + url.append("?").append(query);
>
> }
>
>
>
> final String characterEncoding =
> setupContentTypeAndCharacterEncoding(ext, request, response);
>
> @@ -81,13 +88,25 @@
>
> aggregationState.setCharacterEncoding(characterEncoding);
>
>
>
> // @todo this is temporary.. add a new filter?
>
> - aggregationState.setOriginalURI(URLDecoder.decode(originalUri,
> characterEncoding));
> - aggregationState.setOriginalURL(URLDecoder.decode(url.toString(),
> characterEncoding));
>
> + if
> (SystemProperty.getBooleanProperty(SystemProperty.MAGNOLIA_UTF8_ENABLED)) {
> + request = new UnicodeNormalizerRequestWrapper(request);
> + MgnlContext.push(request, response);
> + aggregationState.setOriginalURI(URLDecoder.decode(originalUri,
> characterEncoding));
> +
> aggregationState.setOriginalURL(URLDecoder.decode(url.toString(),
> characterEncoding));
> + }
> + else {
> + aggregationState.setOriginalURI(originalUri);
> + aggregationState.setOriginalURL(url.toString());
> + }
>
> aggregationState.setOriginalBrowserURI(originalUri);
>
> aggregationState.setOriginalBrowserURL(url.toString());
>
> aggregationState.setExtension(ext);
>
>
>
> chain.doFilter(request, response);
>
> +
> + if
> (SystemProperty.getBooleanProperty(SystemProperty.MAGNOLIA_UTF8_ENABLED)) {
> + MgnlContext.pop();
> + }
>
> }
>
>
>
> // TODO : test + simplification (substringAfterLast(uri, ".") probably
> does the trick !?
>
> @@ -115,4 +134,76 @@
>
> return characterEncoding;
>
> }
>
>
>
> + public class UnicodeNormalizerRequestWrapper extends
> HttpServletRequestWrapper
> + {
> +
> + private HttpServletRequest original;
> +
> + private Map parameters;
> +
> + /**
> + * @param request
> + */
> + public UnicodeNormalizerRequestWrapper(HttpServletRequest request)
> + {
> + super(request);
> + original = request;
> + }
> +
> + /**
> + * {...@inheritdoc}
> + */
> + @Override
> + public String getParameter(String name)
> + {
> + String[] values = getParameterValues(name);
> + if (values != null && values.length > 0)
> + {
> + return values[0];
> + }
> + return null;
> + }
> +
> + /**
> + * {...@inheritdoc}
> + */
> + @Override
> + public Map getParameterMap()
> + {
> + if (parameters == null)
> + {
> + parameters = new HashMap<String, String[]>();
> + for (Object key : original.getParameterMap().keySet())
> + {
> + String[] value =
> transform((String[])original.getParameterMap().get(key));
> + parameters.put(key, value);
> + }
> + }
> + return parameters;
> + }
> +
> + public String[] transform(String[] input)
> + {
> + String[] toNormalize = input;
> + if (toNormalize != null && toNormalize.length > 0)
> + {
> + for (int i = 0; i < toNormalize.length; i++)
> + {
> + toNormalize[i] =
> UnicodeNormalizer.normalizeNFC(toNormalize[i]);
> + }
> + }
> + return toNormalize;
> + }
> +
> + /**
> + * {...@inheritdoc}
> + */
> + @Override
> + public String[] getParameterValues(String name)
> + {
> + return (String[]) getParameterMap().get(name);
> + }
> +
> + }
> +
>
> }
>
> Modified:
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/CosMultipartRequestFilter.java
> (32380 => 32381)
>
> ---
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/CosMultipartRequestFilter.java
> 2010-03-04 11:07:34 UTC (rev 32380)
> +++
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/filters/CosMultipartRequestFilter.java
> 2010-03-04 11:11:13 UTC (rev 32381)
>
> @@ -35,6 +35,7 @@
>
>
>
> import info.magnolia.cms.beans.runtime.MultipartForm;
>
> import info.magnolia.cms.core.Path;
>
> +import info.magnolia.cms.util.UnicodeNormalizer;
>
> import info.magnolia.context.MgnlContext;
>
>
>
> import java.io.IOException;
>
> @@ -107,16 +108,20 @@
>
> while (params.hasMoreElements()) {
>
> String name = (String) params.nextElement();
>
> String value = multi.getParameter(name);
>
> - form.addParameter(name, value);
>
> + form.addParameter(name, UnicodeNormalizer.normalizeNFC(value));
>
> String[] s = multi.getParameterValues(name);
>
> if (s != null) {
>
> + for(int i = 0; i < s.length; i++)
> + {
> + s[i] = UnicodeNormalizer.normalizeNFC(s[i]);
> + }
>
> form.addparameterValues(name, s);
>
> }
>
> }
>
> Enumeration files = multi.getFileNames();
>
> while (files.hasMoreElements()) {
>
> String name = (String) files.nextElement();
>
> - form.addDocument(name, multi.getFilesystemName(name),
> multi.getContentType(name), multi.getFile(name));
>
> + form.addDocument(name,
> UnicodeNormalizer.normalizeNFC(multi.getFilesystemName(name)),
> multi.getContentType(name), multi.getFile(name));
>
> }
>
> request.setAttribute(MultipartForm.REQUEST_ATTRIBUTE_NAME, form);
>
> return form;
>
> Modified:
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/util/UnicodeNormalizer.java
> (32380 => 32381)
>
> ---
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/util/UnicodeNormalizer.java
> 2010-03-04 11:07:34 UTC (rev 32380)
> +++
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/cms/util/UnicodeNormalizer.java
> 2010-03-04 11:11:13 UTC (rev 32381)
>
> @@ -33,10 +33,13 @@
>
> */
>
> package info.magnolia.cms.util;
>
>
>
> +import info.magnolia.cms.core.SystemProperty;
>
> import info.magnolia.objectfactory.Components;
>
>
>
> +import java.io.UnsupportedEncodingException;
>
> import java.lang.reflect.InvocationTargetException;
>
> import java.lang.reflect.Method;
>
> +import java.util.Arrays;
>
>
>
> /**
>
> * A wrapper around java.text.Normalizer and com.ibm.icu.text.Normalizer;
> uses the former if present, or none
>
> @@ -67,7 +70,17 @@
>
> * Normalizes the given String to the NFC form.
>
> */
>
> public static String normalizeNFC(String in) {
>
> - return normalizer.normalizeNFC(in);
>
> + if
> (SystemProperty.getBooleanProperty(SystemProperty.MAGNOLIA_UTF8_ENABLED)) {
> + try {
> + log.debug("not normalized: " +
> Arrays.toString(in.getBytes("UTF-8")) + " (" + in + ")");
> + String out = normalizer.normalizeNFC(in);
> + log.debug(" normalized: " +
> Arrays.toString(out.getBytes("UTF-8")) + " (" + out + ")");
> + return out;
> + } catch (UnsupportedEncodingException e) {
> + // do nothing
> + }
> + }
> + return in;
>
> }
>
>
>
> public interface Normalizer {
>
> Modified:
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/importexport/filters/ImportXmlRootFilter.java
> (32380 => 32381)
>
> ---
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/importexport/filters/ImportXmlRootFilter.java
> 2010-03-04 11:07:34 UTC (rev 32380)
> +++
> community/magnolia/branches/utf8support/magnolia-core/src/main/java/info/magnolia/importexport/filters/ImportXmlRootFilter.java
> 2010-03-04 11:11:13 UTC (rev 32381)
>
> @@ -33,6 +33,8 @@
>
> */
>
> package info.magnolia.importexport.filters;
>
>
>
> +import info.magnolia.cms.util.UnicodeNormalizer;
> +
>
> import org.xml.sax.Attributes;
>
> import org.xml.sax.SAXException;
>
> import org.xml.sax.XMLReader;
>
> @@ -117,6 +119,7 @@
>
> // filter if it is the version store
>
> if ("sv:node".equals(qName)) { //$NON-NLS-1$
>
> String attName = atts.getValue("sv:name"); //$NON-NLS-1$
>
> + attName = UnicodeNormalizer.normalizeNFC(attName);
>
> // remember if there was a root node presend
>
> if ("jcr:root".equals(attName)) {
>
> this.rootNodeFound = true;
>
> Modified:
> community/magnolia/branches/utf8support/magnolia-core/src/test/java/info/magnolia/cms/util/UnicodeNormalizerTest.java
> (32380 => 32381)
>
> ---
> community/magnolia/branches/utf8support/magnolia-core/src/test/java/info/magnolia/cms/util/UnicodeNormalizerTest.java
> 2010-03-04 11:07:34 UTC (rev 32380)
> +++
> community/magnolia/branches/utf8support/magnolia-core/src/test/java/info/magnolia/cms/util/UnicodeNormalizerTest.java
> 2010-03-04 11:11:13 UTC (rev 32381)
>
> @@ -68,6 +68,7 @@
>
> SystemProperty.getProperties().clear();
>
> ComponentsTestUtil.clear();
>
>
> SystemProperty.setProperty("info.magnolia.cms.util.UnicodeNormalizer$Normalizer",
> "info.magnolia.cms.util.UnicodeNormalizer$AutoDetectNormalizer");
>
> + SystemProperty.setProperty("magnolia.utf8.enabled", "true");
>
> }
>
>
>
> protected void tearDown() throws Exception {
>
>
>
> ----------------------------------------------------------------
> For list details see
> http://www.magnolia-cms.com/home/community/mailing-lists.html
> To unsubscribe, E-mail to: <[email protected]>
> ----------------------------------------------------------------
----------------------------------------------------------------
For list details see
http://www.magnolia-cms.com/home/community/mailing-lists.html
To unsubscribe, E-mail to: <[email protected]>
----------------------------------------------------------------