Ate Douma pushed to branch release/1.1 at cms-community / 
hippo-services-htmlprocessor


Commits:
40e1f545 by Ate Douma at 2018-01-15T20:58:53+01:00
HHP-24 Improved cleaning

(cherry picked from commit 9681ab7074dc7f5286bcfaaa531b21482bf84082)
(cherry picked from commit 5344c23fdd5290707041ce1840d17cc006969a16)

- - - - -


1 changed file:

- 
src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java


Changes:

=====================================
src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java
=====================================
--- 
a/src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java
+++ 
b/src/main/java/org/onehippo/cms7/services/htmlprocessor/filter/WhitelistHtmlFilter.java
@@ -1,5 +1,5 @@
 /*
- *  Copyright 2017 Hippo B.V. (http://www.onehippo.com)
+ *  Copyright 2017-2018 Hippo B.V. (http://www.onehippo.com)
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@ package org.onehippo.cms7.services.htmlprocessor.filter;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 import org.htmlcleaner.TagNode;
@@ -29,7 +30,9 @@ public class WhitelistHtmlFilter implements HtmlFilter {
 
     public static final Logger log = 
LoggerFactory.getLogger(WhitelistHtmlFilter.class);
 
-    private static final String JAVASCRIPT_PROTOCOL = "javascript:";
+    private static final String JAVASCRIPT_PROTOCOL = "javascript";
+    private static final String DATA_PROTOCOL = "data";
+    private static final Pattern CRLFTAB = Pattern.compile("[\r\n\t]");
 
     private final Map<String, Element> elements = new HashMap<>();
     private final boolean omitJavascriptProtocol;
@@ -61,8 +64,9 @@ public class WhitelistHtmlFilter implements HtmlFilter {
             // an omitted HTML envelope
             return null;
         }
-
-        filterAttributes(node);
+        if (nodeName != null) {
+            filterAttributes(node);
+        }
 
         for (final TagNode childNode : node.getChildTags()) {
             if (apply(childNode) == null) {
@@ -80,13 +84,25 @@ public class WhitelistHtmlFilter implements HtmlFilter {
                 .filter(attribute -> 
allowedElement.hasAttribute(attribute.getKey()))
                 .collect(Collectors.toMap(attribute -> attribute.getKey(), 
attribute -> {
                     final String value = attribute.getValue();
-                    final String normalizedValue = 
CharacterReferenceNormalizer.normalize(value.toLowerCase().trim());
-                    if (omitJavascriptProtocol && 
normalizedValue.startsWith(JAVASCRIPT_PROTOCOL)) {
+                    final String normalizedValue =
+                            
cleanCRLFTAB(CharacterReferenceNormalizer.normalize(value.toLowerCase().trim()));
+                    if (omitJavascriptProtocol &&
+                            (normalizedValue.startsWith(JAVASCRIPT_PROTOCOL) ||
+                                    checkDataAttrValue(node.getName(), 
attribute.getKey(), normalizedValue))) {
                         return "";
                     }
-
                     return value;
                 }));
         node.setAttributes(attributes);
     }
+
+    private static String cleanCRLFTAB(final String value) {
+        return CRLFTAB.matcher(value).replaceAll("");
+    }
+
+    private boolean checkDataAttrValue(final String tagName, final String 
attrName, final String attrValue) {
+        return attrValue.startsWith(DATA_PROTOCOL)
+                ? ("a".equals(tagName) && "href".equals(attrName)) || 
("object".equals(tagName) && "data".equals(attrName))
+                : false;
+    }
 }



View it on GitLab: 
https://code.onehippo.org/cms-community/hippo-services-htmlprocessor/commit/40e1f54548c5f6f966677edd905f08d04743f809

---
View it on GitLab: 
https://code.onehippo.org/cms-community/hippo-services-htmlprocessor/commit/40e1f54548c5f6f966677edd905f08d04743f809
You're receiving this email because of your account on code.onehippo.org.
_______________________________________________
Hippocms-svn mailing list
Hippocms-svn@lists.onehippo.org
https://lists.onehippo.org/mailman/listinfo/hippocms-svn

Reply via email to