Ate Douma pushed to branch release/1.1 at cms-community / hippo-services-htmlprocessor
Commits: 40e1f545 by Ate Douma at 2018-01-15T20:58:53+01:00 HHP-24 Improved cleaning (cherry picked from commit 9681ab7074dc7f5286bcfaaa531b21482bf84082) (cherry picked from commit 5344c23fdd5290707041ce1840d17cc006969a16) - - - - - 1 changed file: - src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java Changes: ===================================== src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java ===================================== --- a/src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java +++ b/src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java @@ -1,5 +1,5 @@ /* - * Copyright 2017 Hippo B.V. (http://www.onehippo.com) + * Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ package org.onehippo.cms7.services.htmlprocessor.filter; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import java.util.stream.Collectors; import org.htmlcleaner.TagNode; @@ -29,7 +30,9 @@ public class WhitelistHtmlFilter implements HtmlFilter { public static final Logger log = LoggerFactory.getLogger(WhitelistHtmlFilter.class); - private static final String JAVASCRIPT_PROTOCOL = "javascript:"; + private static final String JAVASCRIPT_PROTOCOL = "javascript"; + private static final String DATA_PROTOCOL = "data"; + private static final Pattern CRLFTAB = Pattern.compile("[\r\n\t]"); private final Map<String, Element> elements = new HashMap<>(); private final boolean omitJavascriptProtocol; @@ -61,8 +64,9 @@ public class WhitelistHtmlFilter implements HtmlFilter { // an omitted HTML envelope return null; } - - filterAttributes(node); + if (nodeName != null) { + filterAttributes(node); + } for (final TagNode childNode : node.getChildTags()) { if (apply(childNode) == null) { @@ -80,13 +84,25 @@ public class WhitelistHtmlFilter implements HtmlFilter { .filter(attribute -> allowedElement.hasAttribute(attribute.getKey())) .collect(Collectors.toMap(attribute -> attribute.getKey(), attribute -> { final String value = attribute.getValue(); - final String normalizedValue = CharacterReferenceNormalizer.normalize(value.toLowerCase().trim()); - if (omitJavascriptProtocol && normalizedValue.startsWith(JAVASCRIPT_PROTOCOL)) { + final String normalizedValue = + cleanCRLFTAB(CharacterReferenceNormalizer.normalize(value.toLowerCase().trim())); + if (omitJavascriptProtocol && + (normalizedValue.startsWith(JAVASCRIPT_PROTOCOL) || + checkDataAttrValue(node.getName(), attribute.getKey(), normalizedValue))) { return ""; } - return value; })); node.setAttributes(attributes); } + + private static String cleanCRLFTAB(final String value) { + return CRLFTAB.matcher(value).replaceAll(""); + } + + private boolean checkDataAttrValue(final String tagName, final String attrName, final String attrValue) { + return attrValue.startsWith(DATA_PROTOCOL) + ? ("a".equals(tagName) && "href".equals(attrName)) || ("object".equals(tagName) && "data".equals(attrName)) + : false; + } } View it on GitLab: https://code.onehippo.org/cms-community/hippo-services-htmlprocessor/commit/40e1f54548c5f6f966677edd905f08d04743f809 --- View it on GitLab: https://code.onehippo.org/cms-community/hippo-services-htmlprocessor/commit/40e1f54548c5f6f966677edd905f08d04743f809 You're receiving this email because of your account on code.onehippo.org.
_______________________________________________ Hippocms-svn mailing list Hippocms-svn@lists.onehippo.org https://lists.onehippo.org/mailman/listinfo/hippocms-svn