rombert commented on code in PR #28: URL: https://github.com/apache/sling-org-apache-sling-xss/pull/28#discussion_r915678971
########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; Review Comment: There is no need to initialise variables to null, please remove that. ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexps, + // @JacksonXmlElementWrapper(localName = "literal-list") Review Comment: Please remove commented out code ########## src/main/java/org/owasp/html/DynamicAttributesSanitizerPolicy.java: ########## @@ -0,0 +1,134 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.owasp.html; + +import java.lang.reflect.InvocationTargetException; + +import java.lang.reflect.Method; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; + +import javax.annotation.Nullable; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class DynamicAttributesSanitizerPolicy extends ElementAndAttributePolicyBasedSanitizerPolicy { Review Comment: Please document this class. We should say: 1. Why do we need to create this class 2. Why we are placing it in the `org.owasp.html` package. ########## src/main/java/org/apache/sling/xss/impl/XSSFilterImpl.java: ########## @@ -229,18 +234,18 @@ public AntiSamyPolicy getActivePolicy() { private boolean runHrefValidation(@NotNull String url) { // Same logic as in org.owasp.validator.html.scan.MagicSAXFilter.startElement() - boolean isValid = hrefAttribute.containsAllowedValue(url.toLowerCase()); + String urlLowerCase = url.toLowerCase(); + boolean isValid = hrefAttribute.containsAllowedValue(urlLowerCase); if (!isValid) { try { - isValid = hrefAttribute.matchesAllowedExpression(url.toLowerCase()); + isValid = hrefAttribute.matchesAllowedExpression(urlLowerCase); } catch (StackOverflowError e) { - logger.debug("Detected a StackOverflowError when validating url {} with configured regexes. Trying fallback.", url); + logger.debug( + "Detected a StackOverflowError when validating url {} with configured regexes. Trying fallback.", url); Review Comment: Please revert this formatting change. ########## src/main/java/org/apache/sling/xss/impl/AntiSamyHtmlSanitizer.java: ########## @@ -0,0 +1,95 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; + +import org.apache.sling.xss.impl.xml.Policy; +import org.owasp.html.DynamicAttributesSanitizerPolicy; +import org.owasp.html.Handler; +import org.owasp.html.HtmlSanitizer; +import org.owasp.html.HtmlStreamEventReceiver; +import org.owasp.html.HtmlStreamRenderer; +import org.owasp.html.PolicyFactory; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class AntiSamyHtmlSanitizer { + + public static final Object DOM = "DOM"; + public static final Object SAX = "SAX"; + + private CustomPolicy custumPolicy; + private ImmutableMap policies; + private ImmutableSet<String> textContainers; + + public AntiSamyHtmlSanitizer() { + } + + public AntiSamyHtmlSanitizer(Policy policy) { + this.custumPolicy = new CustomPolicy(policy); + policies = reflectionGetPolicies(custumPolicy.getCustomPolicyFactory()); + textContainers = reflectionGetTextContainers(custumPolicy.getCustomPolicyFactory()); + } + Review Comment: Can this constructor be removed? I think it's not used and this class would be broken without a Policy class anyway. ########## src/main/java/org/apache/sling/xss/impl/xml/Policy.java: ########## @@ -0,0 +1,391 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.sling.xss.impl.PolicyException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.ctc.wstx.stax.WstxInputFactory; +import com.ctc.wstx.stax.WstxOutputFactory; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; + +public class Policy { + + private static final String DIRECTIVE_EMBED_STYLE_SHEETS = "embedStyleSheets"; + + public static class CssPolicy { + + private final Map<String, Property> cssRules; + private final IncludeExcludeMatcher elementMatcher; + private final IncludeExcludeMatcher classMatcher; + private final IncludeExcludeMatcher idMatcher; + private final IncludeExcludeMatcher pseudoElementMatcher; + private final IncludeExcludeMatcher attributeMatcher; + + public CssPolicy(Map<String, Property> cssrules, Map<String, Pattern> commonRegExps, Map<String, String> directives) { + this.cssRules = Collections.unmodifiableMap(cssrules); + this.elementMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssElementSelector"), + commonRegExps.get("cssElementExclusion")); + this.classMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssClassSelector"), + commonRegExps.get("cssClassExclusion")); + this.idMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssIDSelector"), + commonRegExps.get("cssIDExclusion")); + this.pseudoElementMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssPseudoElementSelector"), + commonRegExps.get("cssPseudoElementExclusion")); + this.attributeMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssAttributeSelector"), + commonRegExps.get("cssAttributeExclusion")); + } + + public Map<String, Property> getCssRules() { + return cssRules; + } + + public boolean isValidElementName(String name) { + return elementMatcher.matches(name); + } + + public boolean isValidClassName(String name) { + return classMatcher.matches(name); + } + + public boolean isValidId(String name) { + return idMatcher.matches(name); + } + + public boolean isValidPseudoElementName(String name) { + return pseudoElementMatcher.matches(name); + } + + public boolean isValidAttributeSelector(String name) { + return attributeMatcher.matches(name); + } + } + + protected final Map<String, Pattern> commonRegularExpressions = new HashMap<>(); + protected final Map<String, Attribute> commonAttributes = new HashMap<>(); + protected final Map<String, Tag> tagRules = new HashMap<>(); + protected final Map<String, Property> cssRules = new HashMap<>(); + protected final Map<String, String> directives = new HashMap<>(); + protected final Map<String, Attribute> globalAttributes = new HashMap<>(); + protected final Map<String, Attribute> dynamicAttributes = new HashMap<>(); + protected List<String> allowedEmptyTags = new ArrayList<>(); + protected final List<String> requireClosingTags = new ArrayList<>(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public Map<String, String> getDirectives() { + return directives; + } + + public List<String> getRequireClosingTags() { + return requireClosingTags; + } + + public Map<String, Pattern> getCommonRegularExpressions() { + return commonRegularExpressions; + } + + public Map<String, Attribute> getGlobalAttributes() { + return globalAttributes; + } + + public Map<String, Attribute> getCommonAttributes() { + return commonAttributes; + } + + public Map<String, Property> getCssRules() { + return cssRules; + } + + public List<String> getAllowedEmptyTags() { + return allowedEmptyTags; + } + + public Map<String, Tag> getTagRules() { + return tagRules; + } + + public Map<String, Attribute> getDynamicAttributes() { + return dynamicAttributes; + } + + public CssPolicy getCssPolicy() { + + return new CssPolicy(cssRules, + commonRegularExpressions, directives); + } + + protected Policy(InputStream input) throws PolicyException, XMLStreamException, IOException { + AntiSamyRules root = null; + root = getTopLevelElement(input); + init(root); + } + + public static Policy getInstance(InputStream bais) throws PolicyException, XMLStreamException, IOException { + return new Policy(bais); + } + + public Tag getTagByLowercaseName(String a) { + return tagRules.get(a); + } + + public AntiSamyRules getTopLevelElement(InputStream input) + throws IOException, XMLStreamException { + XMLInputFactory xmlInputFactory = new WstxInputFactory(); + XMLStreamReader xmlStreamReader; + AntiSamyRules antiSamyRules = null; + xmlStreamReader = xmlInputFactory.createXMLStreamReader(input); + XmlMapper mapper = new XmlMapper(xmlInputFactory, new WstxOutputFactory()); + antiSamyRules = mapper.readValue(xmlStreamReader, AntiSamyRules.class); + if ( "true".equals(antiSamyRules.getDirectivesByName().get(DIRECTIVE_EMBED_STYLE_SHEETS)) ) { + logger.warn("Unsupported configuration directive {} is set to true and will be ignored", DIRECTIVE_EMBED_STYLE_SHEETS); + } + return antiSamyRules; + } + + private void init(AntiSamyRules topLevelElement) throws PolicyException { + parseCommonRegExps(topLevelElement.getRegexpList()); + parseDirectives(topLevelElement.getDirectiveList()); + parseCommonAttributes(topLevelElement.getCommonAttributeList()); + parseGlobalAttributes(topLevelElement.getGlobalTagAttributes().getGlobalTagAttributeList()); + parseDynamicAttributes(topLevelElement.getDynamicTagAttribute().getDynamicTagAttributeList()); + parseTagRules(topLevelElement.getTagRulesList()); + parseCSSRules(topLevelElement.getPropertyList()); + + parseAllowedEmptyTags(topLevelElement.getAllowedEmptyTags()); + } + + /** + * Go through the <common-regexps> section of the policy file. + * + * @param root Top level of <common-regexps> + * @param commonRegularExpressions2 the antisamy regexp objects + */ + private void parseCommonRegExps(List<Regexp> root) { + for (Regexp regex : root) { + String name = regex.getName(); + Pattern regexp = Pattern.compile(regex.getValue(), + Pattern.DOTALL); + commonRegularExpressions.put(name, regexp); + } + } + + /** + * Go through <directives> section of the policy file. + * + * @param root Top level of <directives> + * @param directives The directives map to update + */ + private void parseDirectives(List<Directive> root) { + for (Directive directive : root) { + String name = directive.getName(); + String value = directive.getValue(); + directives.put(name, value); + } + } + + private void parseCommonAttributes(List<Attribute> root) { + + for (Attribute attribute : root) { + + List<Regexp> allowedRegexps = getAllowedRegexps(attribute.getRegexpList()); + Attribute newAttribute = new Attribute(attribute.getName(), allowedRegexps, attribute.getLiteralList(), + attribute.getOnInvalid(), attribute.getDescription()); + commonAttributes.put(attribute.getName().toLowerCase(), newAttribute); + } + + } + + // /** + // * Go through <allowed-empty-tags> section of the policy file. + // * + // * @param allowedEmptyTagsListNode Top level of <allowed-empty-tags> + // * @param allowedEmptyTags The tags that can be empty + // */ + private void parseAllowedEmptyTags(AllowedEmptyTags allowedEmptyTagsList) throws PolicyException { + if (allowedEmptyTagsList != null) { + allowedEmptyTags = allowedEmptyTagsList.getLiterals(); + } else + allowedEmptyTags.addAll(Arrays.asList( Review Comment: This does not look right, we should not hardcode a list of tags. Is it needed? ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") Review Comment: Please remove commented out code ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { Review Comment: Use empty collections as defaults to remove this check. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: Review Comment: Should we really fallback to "validate" in case of an unknown action? I think we should fail if we find an unknown value. Perhaps run these actions on "validate" and ""/null? ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { + + // if there are allowed Attributes, map over them + for (Attribute attribute : allowedAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) + styleSeen = true; + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue) + .onElements(tag.getValue().getName()); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { Review Comment: Use empty collections as defaults to remove this check. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { + + // if there are allowed Attributes, map over them + for (Attribute attribute : allowedAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) + styleSeen = true; + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue) + .onElements(tag.getValue().getName()); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()) + .matching(matchesToPatterns(regexsFromAttribute)) + .onElements(tag.getValue().getName()); + } else { + policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName()); + } + } + + if (!styleSeen) { + policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName()); + } + } + break; + } + } + + // disallow style tag on specific elements + policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0])); + + // ---------- dynamic attributes ------------ + Map<String, Attribute> dynamicAttributes = new HashMap<>(); + // checks if the dynamic attributes are allowed + if (policy.getDirectives().get("allowDynamicAttributes").equals("true")) { + dynamicAttributes.putAll(policy.getDynamicAttributes()); + for (Attribute attribute : dynamicAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexps, + // @JacksonXmlElementWrapper(localName = "literal-list") + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValues, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalid, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description) { + this.name = name; + this.description = description; + this.onInvalid = onInvalid; + this.regexpList = allowedRegexps; + this.literalList = allowedValues; + } + + @Override + public String toString() { + return "Attribute - name: " + name + ", description " + description + ", onInvalid " + onInvalid + + ", allowedRegexlist: " + + regexpList.size() + ", literals " + literalList; + } + + public String getOnInvalid() { + if (onInvalid != null && onInvalid.length() > 0) { + return onInvalid; + } else { + onInvalid = "removeAttribute"; + return onInvalid; + } + } + + public String getDescription() { + return description; + } + + public String getName() { + return name; + } + + public List<String> getLiterals() { + if (literalList != null && literalList.size() > 0) { Review Comment: It is generally better to return empty collections instead of null, because it makes using the result simpler. You can do this by setting the initial value to something like `new ArrayList<>()` or (better) `Collections.emptyList()`. Then you can skip the null checks. Also, the `size() > 0` or `isEmpty()` checks are not needed when all you do is iterate over the collection. ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> Review Comment: Please remove commented out code ########## src/main/java/org/apache/sling/xss/impl/xml/Property.java: ########## @@ -0,0 +1,123 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Property { + private String name; + private String description; + private String defaultValue; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList; + + @JacksonXmlElementWrapper(localName = "category-list") + private List<Category> categoryList; + + @JacksonXmlElementWrapper(localName = "shorthand-list") + private List<Shorthand> shorthandList; + + private String onInvalid; + + @JsonCreator + public Property(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexp3, + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValue, + @JacksonXmlProperty(localName = "shorthand") List<Shorthand> shortHandRefs, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalidStr, + @JacksonXmlProperty(isAttribute = true, localName = "default") String defaultValue) { + + this.name = name; + this.description = description; + this.onInvalid = onInvalidStr; + this.regexpList = allowedRegexp3; + this.literalList = allowedValue; + this.shorthandList = shortHandRefs; + this.defaultValue = defaultValue; + } + + public List<Category> getCategoryList() { + return categoryList; + } + + public String getDefaultValue() { + return defaultValue; + } + + public String getDescription() { + return description; + } + + public List<Literal> getLiteralList() { + return literalList; + } + + public String getName() { + return name; + } + + public List<Regexp> getRegexpList() { + return regexpList; + } + + public List<Shorthand> getShorthandList() { + return shorthandList; + } + + public List<String> getShorthands() { + // reads out the shorthands and creats a list out of it Review Comment: Typo: creates ########## src/test/java/org/owasp/validator/html/XMLParser/PolicyTest.java: ########## @@ -0,0 +1,73 @@ +/******************************************************************************* + * Licensed to the Apache Software Foundation (ASF) under one or + * more contributor license agreements. See the NOTICE file + * distributed with this work for additional information regarding + * copyright ownership. The ASF licenses this file to you under the + * Apache License, Version 2.0 (the "License"); you may not use + * this file except in compliance with the License. You may obtain + * a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 Unless required by + * applicable law or agreed to in writing, software distributed + * under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions + * and limitations under the License. + ******************************************************************************/ +package org.owasp.validator.html.XMLParser; Review Comment: This test should be in the same package as the class that it tests ( but of course in the `src/test/java` folder ) ########## src/main/java/org/apache/sling/xss/impl/xml/Property.java: ########## @@ -0,0 +1,123 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Property { + private String name; + private String description; + private String defaultValue; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList; + + @JacksonXmlElementWrapper(localName = "category-list") + private List<Category> categoryList; + + @JacksonXmlElementWrapper(localName = "shorthand-list") + private List<Shorthand> shorthandList; + + private String onInvalid; + + @JsonCreator + public Property(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexp3, + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValue, + @JacksonXmlProperty(localName = "shorthand") List<Shorthand> shortHandRefs, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalidStr, + @JacksonXmlProperty(isAttribute = true, localName = "default") String defaultValue) { + + this.name = name; + this.description = description; + this.onInvalid = onInvalidStr; + this.regexpList = allowedRegexp3; + this.literalList = allowedValue; + this.shorthandList = shortHandRefs; + this.defaultValue = defaultValue; + } + + public List<Category> getCategoryList() { + return categoryList; + } + + public String getDefaultValue() { + return defaultValue; + } + + public String getDescription() { + return description; + } + + public List<Literal> getLiteralList() { + return literalList; + } + + public String getName() { + return name; + } + + public List<Regexp> getRegexpList() { + return regexpList; + } + + public List<Shorthand> getShorthandList() { + return shorthandList; + } + + public List<String> getShorthands() { + // reads out the shorthands and creats a list out of it + + return shorthandList != null ? shorthandList.stream().map(shorthand -> shorthand.getName()) + .collect(Collectors.toList()) : Collections.emptyList(); + } + + public List<String> getLiterals() { + // reads out the literals and creats a list out of it Review Comment: Typo: creates ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexps, + // @JacksonXmlElementWrapper(localName = "literal-list") + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValues, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalid, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description) { + this.name = name; + this.description = description; + this.onInvalid = onInvalid; + this.regexpList = allowedRegexps; + this.literalList = allowedValues; + } + + @Override + public String toString() { + return "Attribute - name: " + name + ", description " + description + ", onInvalid " + onInvalid + + ", allowedRegexlist: " + + regexpList.size() + ", literals " + literalList; + } + + public String getOnInvalid() { + if (onInvalid != null && onInvalid.length() > 0) { + return onInvalid; Review Comment: Looks like you want a default value where. Have you tried setting the initial value of the onInvalid field to "removeAttribute"? e.g. ```java private String onInvalid = "removeAttribute" ``` ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexps, + // @JacksonXmlElementWrapper(localName = "literal-list") + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValues, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalid, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description) { + this.name = name; + this.description = description; + this.onInvalid = onInvalid; + this.regexpList = allowedRegexps; + this.literalList = allowedValues; + } + + @Override + public String toString() { + return "Attribute - name: " + name + ", description " + description + ", onInvalid " + onInvalid + + ", allowedRegexlist: " + + regexpList.size() + ", literals " + literalList; + } + + public String getOnInvalid() { + if (onInvalid != null && onInvalid.length() > 0) { + return onInvalid; + } else { + onInvalid = "removeAttribute"; + return onInvalid; + } + } + + public String getDescription() { + return description; + } + + public String getName() { + return name; + } + + public List<String> getLiterals() { + if (literalList != null && literalList.size() > 0) { + return literalList.stream().map(literal -> literal.getValue().toLowerCase()).collect(Collectors.toList()); + } + return null; + } + + public List<Literal> getLiteralList() { + return literalList; + } + + public List<Pattern> getPatternList() { + return regexpList.stream().map(regexp -> regexp.getPattern()) + .collect(Collectors.toList()); + + } + + public List<Regexp> getRegexpList() { + return regexpList; + } + + public boolean containsAllowedValue(String valueInLowerCase) { + List<String> literals = getLiterals(); + return literals != null && literals.size() > 0 ? getLiterals().contains(valueInLowerCase) : false; Review Comment: Remove once `getLiterals()` no longer returns null. ########## src/main/java/org/apache/sling/xss/impl/xml/Attribute.java: ########## @@ -0,0 +1,142 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Attribute { + + private String name = null; + private String description = null; + private String onInvalid = null; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList = null; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList = null; + + // private List<Pattern> patternList = regexpList.stream().map(regexp -> + // regexp.getPattern()) + // .collect(Collectors.toList()); + + @JsonCreator + public Attribute(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + // @JacksonXmlElementWrapper(localName = "regexp-list") + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexps, + // @JacksonXmlElementWrapper(localName = "literal-list") + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValues, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalid, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description) { + this.name = name; + this.description = description; + this.onInvalid = onInvalid; + this.regexpList = allowedRegexps; + this.literalList = allowedValues; + } + + @Override + public String toString() { + return "Attribute - name: " + name + ", description " + description + ", onInvalid " + onInvalid + + ", allowedRegexlist: " + + regexpList.size() + ", literals " + literalList; + } + + public String getOnInvalid() { + if (onInvalid != null && onInvalid.length() > 0) { + return onInvalid; + } else { + onInvalid = "removeAttribute"; + return onInvalid; + } + } + + public String getDescription() { + return description; + } + + public String getName() { + return name; + } + + public List<String> getLiterals() { + if (literalList != null && literalList.size() > 0) { + return literalList.stream().map(literal -> literal.getValue().toLowerCase()).collect(Collectors.toList()); + } + return null; + } + + public List<Literal> getLiteralList() { + return literalList; + } + + public List<Pattern> getPatternList() { + return regexpList.stream().map(regexp -> regexp.getPattern()) + .collect(Collectors.toList()); + + } + + public List<Regexp> getRegexpList() { + return regexpList; + } + + public boolean containsAllowedValue(String valueInLowerCase) { + List<String> literals = getLiterals(); + return literals != null && literals.size() > 0 ? getLiterals().contains(valueInLowerCase) : false; + } + + public boolean matchesAllowedExpression(String value) { + if (regexpList != null && regexpList.size() > 0) { Review Comment: Make the `regexpList` default to an empty collection and you can drop this check. ########## src/main/java/org/apache/sling/xss/impl/xml/Property.java: ########## @@ -0,0 +1,123 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.util.Collections; +import java.util.List; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlElementWrapper; +import com.fasterxml.jackson.dataformat.xml.annotation.JacksonXmlProperty; + +public class Property { + private String name; + private String description; + private String defaultValue; + + @JacksonXmlElementWrapper(localName = "regexp-list") + private List<Regexp> regexpList; + + @JacksonXmlElementWrapper(localName = "literal-list") + private List<Literal> literalList; + + @JacksonXmlElementWrapper(localName = "category-list") + private List<Category> categoryList; + + @JacksonXmlElementWrapper(localName = "shorthand-list") + private List<Shorthand> shorthandList; + + private String onInvalid; + + @JsonCreator + public Property(@JacksonXmlProperty(localName = "name", isAttribute = true) String name, + @JacksonXmlProperty(localName = "regexp") List<Regexp> allowedRegexp3, + @JacksonXmlProperty(localName = "literal") List<Literal> allowedValue, + @JacksonXmlProperty(localName = "shorthand") List<Shorthand> shortHandRefs, + @JacksonXmlProperty(localName = "description", isAttribute = true) String description, + @JacksonXmlProperty(localName = "onInvalid", isAttribute = true) String onInvalidStr, + @JacksonXmlProperty(isAttribute = true, localName = "default") String defaultValue) { + + this.name = name; + this.description = description; + this.onInvalid = onInvalidStr; + this.regexpList = allowedRegexp3; + this.literalList = allowedValue; + this.shorthandList = shortHandRefs; + this.defaultValue = defaultValue; + } + + public List<Category> getCategoryList() { + return categoryList; + } + + public String getDefaultValue() { + return defaultValue; + } + + public String getDescription() { + return description; + } + + public List<Literal> getLiteralList() { + return literalList; + } + + public String getName() { + return name; + } + + public List<Regexp> getRegexpList() { + return regexpList; + } + + public List<Shorthand> getShorthandList() { + return shorthandList; + } + + public List<String> getShorthands() { + // reads out the shorthands and creats a list out of it + + return shorthandList != null ? shorthandList.stream().map(shorthand -> shorthand.getName()) + .collect(Collectors.toList()) : Collections.emptyList(); + } + + public List<String> getLiterals() { + // reads out the literals and creats a list out of it + return literalList.stream().map(literal -> literal.getValue()) + .collect(Collectors.toList()); + } + + public String getOnInvalid() { + if (onInvalid != null && onInvalid.length() > 0) { Review Comment: Looks like you want a default value where. Have you tried setting the initial value of the onInvalid field to "removeAttribute"? e.g. private String onInvalid = "removeAttribute" ########## src/main/java/org/apache/sling/xss/impl/AntiSamyHtmlSanitizer.java: ########## @@ -0,0 +1,95 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; + +import org.apache.sling.xss.impl.xml.Policy; +import org.owasp.html.DynamicAttributesSanitizerPolicy; +import org.owasp.html.Handler; +import org.owasp.html.HtmlSanitizer; +import org.owasp.html.HtmlStreamEventReceiver; +import org.owasp.html.HtmlStreamRenderer; +import org.owasp.html.PolicyFactory; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class AntiSamyHtmlSanitizer { + + public static final Object DOM = "DOM"; Review Comment: I don't think you need the `DOM` and `SAX` fields. ########## src/main/java/org/apache/sling/xss/impl/AntiSamyHtmlSanitizer.java: ########## @@ -0,0 +1,95 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; + +import org.apache.sling.xss.impl.xml.Policy; +import org.owasp.html.DynamicAttributesSanitizerPolicy; +import org.owasp.html.Handler; +import org.owasp.html.HtmlSanitizer; +import org.owasp.html.HtmlStreamEventReceiver; +import org.owasp.html.HtmlStreamRenderer; +import org.owasp.html.PolicyFactory; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; + +public class AntiSamyHtmlSanitizer { Review Comment: Do we need to mention AntiSamy here? We do read an AntiSamy config file and try to behave like that library, but probably just `HtmlSanitizer` is enough? ########## src/main/java/org/apache/sling/xss/impl/style/BatikCssCleaner.java: ########## @@ -0,0 +1,83 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.style; + +import java.io.IOException; +import java.io.StringReader; + +import org.apache.batik.css.parser.Parser; +import org.apache.sling.xss.impl.xml.Policy.CssPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.w3c.css.sac.CSSException; +import org.w3c.css.sac.InputSource; + +public class BatikCssCleaner { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private static final String CDATA_PRE = "<![CDATA["; + private static final String CDATA_POST = "]]>"; + private final CssPolicy cssPolicy; + Review Comment: Nit: remove this extra blank line ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { Review Comment: I would argue that this is more like an AntiSamyBasedPolicyFactoryFactory :-) but that is a bad name. Still, I think that `CustomPolicy` is not the best way to express this. Let's first narrow down what we want this class to do. I see that it builds a (Html Sanitizer) PolicyFactory based on a parsed AntiSamy config file. Is that correct? ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { + + // if there are allowed Attributes, map over them + for (Attribute attribute : allowedAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) + styleSeen = true; + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue) + .onElements(tag.getValue().getName()); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()) + .matching(matchesToPatterns(regexsFromAttribute)) + .onElements(tag.getValue().getName()); + } else { + policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName()); + } + } + + if (!styleSeen) { + policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName()); + } + } + break; + } + } + + // disallow style tag on specific elements + policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0])); + + // ---------- dynamic attributes ------------ + Map<String, Attribute> dynamicAttributes = new HashMap<>(); + // checks if the dynamic attributes are allowed + if (policy.getDirectives().get("allowDynamicAttributes").equals("true")) { + dynamicAttributes.putAll(policy.getDynamicAttributes()); + for (Attribute attribute : dynamicAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + for (Pattern regex : regexsFromAttribute) { + dynamicAttributesPolicyMap.put(attribute.getName(), newDynamicAttributePolicy(regex)); + } + } + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + dynamicAttributesPolicyMap.put(attribute.getName(), + newDynamicAttributePolicy(true, allowedValuesFromAttribute.toArray(new String[0]))); + } + + } + } + + policyFactory = policyBuilder.allowTextIn(CssValidator.STYLE_TAG_NAME).toFactory(); + + } + + public PolicyFactory getCustomPolicyFactory() { + return policyFactory; + } + + public Map<String, AttributePolicy> getDynamicAttributesPolicyMap() { + return dynamicAttributesPolicyMap; + } + + public List<String> getOnInvalidRemoveTagList() { + return onInvalidRemoveTagList; + } + + public CssValidator getCssValidator() { + return cssValidator; + } + + private static Predicate<String> matchesToPatterns(List<Pattern> patternList) { + return new Predicate<String>() { + @Override + public boolean apply(String s) { + for (Pattern pattern : patternList) { + if (pattern.matcher(s).matches()) { + return true; + } + } + return false; + } + + @Override + public boolean test(String t) { Review Comment: This is a bit confusing. You should not override both `test` and `apply`. ########## src/main/java/org/apache/sling/xss/impl/HtmlToHtmlContentContext.java: ########## @@ -62,21 +58,17 @@ public boolean check(final PolicyHandler policyHandler, final String str) { * @see XSSFilterRule#filter(PolicyHandler, java.lang.String) */ @Override - public String filter(final PolicyHandler policyHandler, final String str) { - if (StringUtils.isNotEmpty(str)) { + public String filter(final PolicyHandler policyHandler, final String malicousString) { Review Comment: Typo: I think you meant malicious? I think it's better to use something like `unsafe` or `unsanitized` though, we are not sure it's malicious at this point. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/xml/Policy.java: ########## @@ -0,0 +1,391 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.sling.xss.impl.PolicyException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.ctc.wstx.stax.WstxInputFactory; +import com.ctc.wstx.stax.WstxOutputFactory; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; + +public class Policy { Review Comment: I think we need to narrow down the responsibilities for this class, and then find a more communicative name. I think it 1. parses an AntiSamy config file 2. generates the `AntiSamyRules 3. builds some maps so the (current name) CustomPolicy can build the build the HTML Sanitiser Policy Factory Maybe a more appropriate name is AntiSamyRulesPolicy ? Also, we should only expose one public method for creating it ( whether it's a contructor or a `getInstance` static method and simplify the init code ). ########## src/main/java/org/apache/sling/xss/impl/xml/Policy.java: ########## @@ -0,0 +1,391 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl.xml; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + +import org.apache.sling.xss.impl.PolicyException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.ctc.wstx.stax.WstxInputFactory; +import com.ctc.wstx.stax.WstxOutputFactory; +import com.fasterxml.jackson.dataformat.xml.XmlMapper; + +public class Policy { + + private static final String DIRECTIVE_EMBED_STYLE_SHEETS = "embedStyleSheets"; + + public static class CssPolicy { + + private final Map<String, Property> cssRules; + private final IncludeExcludeMatcher elementMatcher; + private final IncludeExcludeMatcher classMatcher; + private final IncludeExcludeMatcher idMatcher; + private final IncludeExcludeMatcher pseudoElementMatcher; + private final IncludeExcludeMatcher attributeMatcher; + + public CssPolicy(Map<String, Property> cssrules, Map<String, Pattern> commonRegExps, Map<String, String> directives) { + this.cssRules = Collections.unmodifiableMap(cssrules); + this.elementMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssElementSelector"), + commonRegExps.get("cssElementExclusion")); + this.classMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssClassSelector"), + commonRegExps.get("cssClassExclusion")); + this.idMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssIDSelector"), + commonRegExps.get("cssIDExclusion")); + this.pseudoElementMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssPseudoElementSelector"), + commonRegExps.get("cssPseudoElementExclusion")); + this.attributeMatcher = new IncludeExcludeMatcher(commonRegExps.get("cssAttributeSelector"), + commonRegExps.get("cssAttributeExclusion")); + } + + public Map<String, Property> getCssRules() { + return cssRules; + } + + public boolean isValidElementName(String name) { + return elementMatcher.matches(name); + } + + public boolean isValidClassName(String name) { + return classMatcher.matches(name); + } + + public boolean isValidId(String name) { + return idMatcher.matches(name); + } + + public boolean isValidPseudoElementName(String name) { + return pseudoElementMatcher.matches(name); + } + + public boolean isValidAttributeSelector(String name) { + return attributeMatcher.matches(name); + } + } + + protected final Map<String, Pattern> commonRegularExpressions = new HashMap<>(); + protected final Map<String, Attribute> commonAttributes = new HashMap<>(); + protected final Map<String, Tag> tagRules = new HashMap<>(); + protected final Map<String, Property> cssRules = new HashMap<>(); + protected final Map<String, String> directives = new HashMap<>(); + protected final Map<String, Attribute> globalAttributes = new HashMap<>(); + protected final Map<String, Attribute> dynamicAttributes = new HashMap<>(); + protected List<String> allowedEmptyTags = new ArrayList<>(); + protected final List<String> requireClosingTags = new ArrayList<>(); + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + public Map<String, String> getDirectives() { + return directives; + } + + public List<String> getRequireClosingTags() { + return requireClosingTags; + } + + public Map<String, Pattern> getCommonRegularExpressions() { + return commonRegularExpressions; + } + + public Map<String, Attribute> getGlobalAttributes() { + return globalAttributes; + } + + public Map<String, Attribute> getCommonAttributes() { + return commonAttributes; + } + + public Map<String, Property> getCssRules() { + return cssRules; + } + + public List<String> getAllowedEmptyTags() { + return allowedEmptyTags; + } + + public Map<String, Tag> getTagRules() { + return tagRules; + } + + public Map<String, Attribute> getDynamicAttributes() { + return dynamicAttributes; + } + + public CssPolicy getCssPolicy() { + + return new CssPolicy(cssRules, + commonRegularExpressions, directives); + } + + protected Policy(InputStream input) throws PolicyException, XMLStreamException, IOException { + AntiSamyRules root = null; + root = getTopLevelElement(input); + init(root); + } + + public static Policy getInstance(InputStream bais) throws PolicyException, XMLStreamException, IOException { + return new Policy(bais); + } + + public Tag getTagByLowercaseName(String a) { + return tagRules.get(a); + } + + public AntiSamyRules getTopLevelElement(InputStream input) + throws IOException, XMLStreamException { + XMLInputFactory xmlInputFactory = new WstxInputFactory(); + XMLStreamReader xmlStreamReader; + AntiSamyRules antiSamyRules = null; + xmlStreamReader = xmlInputFactory.createXMLStreamReader(input); + XmlMapper mapper = new XmlMapper(xmlInputFactory, new WstxOutputFactory()); + antiSamyRules = mapper.readValue(xmlStreamReader, AntiSamyRules.class); + if ( "true".equals(antiSamyRules.getDirectivesByName().get(DIRECTIVE_EMBED_STYLE_SHEETS)) ) { + logger.warn("Unsupported configuration directive {} is set to true and will be ignored", DIRECTIVE_EMBED_STYLE_SHEETS); + } + return antiSamyRules; + } + + private void init(AntiSamyRules topLevelElement) throws PolicyException { + parseCommonRegExps(topLevelElement.getRegexpList()); + parseDirectives(topLevelElement.getDirectiveList()); + parseCommonAttributes(topLevelElement.getCommonAttributeList()); + parseGlobalAttributes(topLevelElement.getGlobalTagAttributes().getGlobalTagAttributeList()); + parseDynamicAttributes(topLevelElement.getDynamicTagAttribute().getDynamicTagAttributeList()); + parseTagRules(topLevelElement.getTagRulesList()); + parseCSSRules(topLevelElement.getPropertyList()); + + parseAllowedEmptyTags(topLevelElement.getAllowedEmptyTags()); + } + + /** + * Go through the <common-regexps> section of the policy file. + * + * @param root Top level of <common-regexps> + * @param commonRegularExpressions2 the antisamy regexp objects + */ + private void parseCommonRegExps(List<Regexp> root) { + for (Regexp regex : root) { + String name = regex.getName(); + Pattern regexp = Pattern.compile(regex.getValue(), + Pattern.DOTALL); + commonRegularExpressions.put(name, regexp); + } + } + + /** + * Go through <directives> section of the policy file. + * + * @param root Top level of <directives> + * @param directives The directives map to update + */ + private void parseDirectives(List<Directive> root) { + for (Directive directive : root) { + String name = directive.getName(); + String value = directive.getValue(); + directives.put(name, value); + } + } + + private void parseCommonAttributes(List<Attribute> root) { + + for (Attribute attribute : root) { + + List<Regexp> allowedRegexps = getAllowedRegexps(attribute.getRegexpList()); + Attribute newAttribute = new Attribute(attribute.getName(), allowedRegexps, attribute.getLiteralList(), + attribute.getOnInvalid(), attribute.getDescription()); + commonAttributes.put(attribute.getName().toLowerCase(), newAttribute); + } + + } + + // /** + // * Go through <allowed-empty-tags> section of the policy file. + // * + // * @param allowedEmptyTagsListNode Top level of <allowed-empty-tags> + // * @param allowedEmptyTags The tags that can be empty + // */ + private void parseAllowedEmptyTags(AllowedEmptyTags allowedEmptyTagsList) throws PolicyException { + if (allowedEmptyTagsList != null) { + allowedEmptyTags = allowedEmptyTagsList.getLiterals(); + } else + allowedEmptyTags.addAll(Arrays.asList( + "br", "hr", "a", "img", "link", "iframe", "script", "object", "applet", + "frame", "base", "param", "meta", "input", "textarea", "embed", + "basefont", "col")); + } + + // /** + // * Go through <global-tag-attributes> section of the policy file. + // * + // * @param root Top level of <global-tag-attributes> + // * @param globalAttributes1 A HashMap of global Attributes that need + // validation + // * for every tag. + // * @param commonAttributes The common attributes + // * @throws PolicyException + // */ + private void parseGlobalAttributes(List<Attribute> root) throws PolicyException { + for (Attribute ele : root) { + + String name = ele.getName(); + Attribute toAdd = commonAttributes.get(name.toLowerCase()); + + if (toAdd != null) + globalAttributes.put(name.toLowerCase(), toAdd); + else + throw new PolicyException("Global attribute '" + name + + "' was not defined in <common-attributes>"); + } + } + + // /** + // * Go through <dynamic-tag-attributes> section of the policy file. + // * + // * @param root Top level of <dynamic-tag-attributes> + // * @param dynamicAttributes A HashMap of dynamic Attributes that need + // validation + // * for every tag. + // * @param commonAttributes The common attributes + // * @throws PolicyException + // */ + + private void parseDynamicAttributes(List<Attribute> root) throws PolicyException { + for (Attribute ele : root) { + + String name = ele.getName(); + Attribute toAdd = commonAttributes.get(name.toLowerCase()); + + if (toAdd != null) { + String attrName = name.toLowerCase().substring(0, name.length() - 1); + dynamicAttributes.put(attrName, toAdd); + } else + throw new PolicyException("Dynamic attribute '" + name + + "' was not defined in <common-attributes>"); + } + } + + private List<Regexp> getAllowedRegexps(List<Regexp> nameAndRegexpsList) { + List<Regexp> allowedRegExp = new ArrayList<>(); + if (nameAndRegexpsList != null && nameAndRegexpsList.size() > 0) { + for (Regexp regExpNode : nameAndRegexpsList) { + String regExpName = regExpNode.getName(); + String value = regExpNode.getValue(); + + if (regExpName != null && regExpName.length() > 0) { + allowedRegExp + .add(new Regexp(regExpNode.getName(), + commonRegularExpressions.get(regExpName).toString())); + } else if (value != null) { + allowedRegExp.add(new Regexp(regExpName, value)); + } + } + } + return allowedRegExp; + } + + private void parseTagRules(List<Tag> root) throws PolicyException { + if (root == null) + return; + + for (Tag tagNode : root) { + String name = tagNode.getName(); + String action = tagNode.getAction(); + + List<Attribute> attributeList = tagNode.getAttributeList(); + List<Attribute> tagAttributes = getTagAttributes(attributeList, name); + Tag tag = new Tag(name, action, tagAttributes); + + tagRules.put(name.toLowerCase(), tag); + } + } + + private List<Attribute> getTagAttributes(List<Attribute> attributeList, String tagName) + throws PolicyException { + + List<Attribute> tagAttributes = new ArrayList<>(); + if (attributeList != null && attributeList.size() > 0) { + for (Attribute attribute : attributeList) { + Attribute newAttribute; + String attributeName = attribute.getName().toLowerCase(); + List<Regexp> regexps = attribute.getRegexpList(); + List<Literal> literals = attribute.getLiteralList(); + String onInvalid = attribute.getOnInvalid(); + String description = attribute.getDescription(); + + // attribute has no children + if (regexps == null && literals == null) { + + Attribute commonAttribute = commonAttributes.get(attributeName); + if (commonAttribute != null) { + // creates a new Attribut with the fetched Attribute informations if not + // available + newAttribute = new Attribute(attributeName, + regexps != null && regexps.size() != 0 ? regexps : commonAttribute.getRegexpList(), Review Comment: Please use empty collections as defaults to avoid these null checks. ########## src/main/java/org/apache/sling/xss/impl/XSSAPIImpl.java: ########## @@ -351,14 +351,18 @@ public String getValidXML(String xml, String defaultXml) { return ""; } + ClassLoader tccl = Thread.currentThread().getContextClassLoader(); Review Comment: Why did you add these lines? ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { + + // if there are allowed Attributes, map over them + for (Attribute attribute : allowedAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { Review Comment: We should extract these string values as constants somewhere and reference them from the code. ########## src/main/java/org/apache/sling/xss/impl/CustomPolicy.java: ########## @@ -0,0 +1,265 @@ +/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~*/ +package org.apache.sling.xss.impl; + +import java.lang.reflect.Field; +import java.lang.reflect.Modifier; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +import javax.annotation.Nullable; + +import org.apache.sling.xss.impl.style.CssValidator; +import org.apache.sling.xss.impl.xml.Attribute; +import org.apache.sling.xss.impl.xml.Policy; +import org.apache.sling.xss.impl.xml.Tag; +import org.owasp.html.AttributePolicy; +import org.owasp.html.HtmlPolicyBuilder; +import org.owasp.html.PolicyFactory; + +import com.google.common.base.Predicate; +import com.google.common.collect.ImmutableSet; + +public class CustomPolicy { + private PolicyFactory policyFactory; + private List<String> onInvalidRemoveTagList = new ArrayList<>(); + private Map<String, AttributePolicy> dynamicAttributesPolicyMap = new HashMap<>(); + private CssValidator cssValidator; + + public CustomPolicy(Policy policy) { + removeAttributeGuards(); + HtmlPolicyBuilder policyBuilder = new HtmlPolicyBuilder(); + + cssValidator = new CssValidator(policy.getCssPolicy()); + + // ------------ this is for the global attributes ------------- + Map<String, Attribute> globalAttributes = policy.getGlobalAttributes(); + for (Attribute attribute : globalAttributes.values()) { + + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) { + // we match style tags separately + policyBuilder.allowAttributes(attribute.getName()).matching(cssValidator.newCssAttributePolicy()) + .globally(); + } else { + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue).globally(); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()).matching(matchesToPatterns(regexsFromAttribute)) + .globally(); + } else { + policyBuilder.allowAttributes(attribute.getName()).globally(); + } + + } + } + + // ------------ this is for the allowed emty tags ------------- + List<String> allowedEmptyTags = policy.getAllowedEmptyTags(); + for (String allowedEmptyTag : allowedEmptyTags) { + policyBuilder.allowWithoutAttributes(allowedEmptyTag); + } + + // ------------ this is for the tag rules ------------- + Map<String, Tag> tagMap = policy.getTagRules(); + for (Map.Entry<String, Tag> tag : tagMap.entrySet()) { + + String tagAction = tag.getValue().getAction(); + switch (tagAction) { + // Tag.action + case "truncate": + policyBuilder.allowElements(tag.getValue().getName()); + + break; + // filter: remove tags, but keep content, + case "filter": + break; + // remove: remove tag and contents + case "remove": + policyBuilder.disallowElements(tag.getValue().getName()); + break; + + // validate is also the default + // validate: keep content as long as it passes rules, + default: + policyBuilder.allowElements(tag.getValue().getName()); + boolean styleSeen = false; + // get the allowed Attributes for the tag + Map<String, Attribute> allowedAttributes = tag.getValue().getAttributeMap(); + if (allowedAttributes != null && allowedAttributes.size() > 0) { + + // if there are allowed Attributes, map over them + for (Attribute attribute : allowedAttributes.values()) { + if (attribute.getOnInvalid().equals("removeTag")) { + onInvalidRemoveTagList.add(attribute.getName()); + } + if (CssValidator.STYLE_ATTRIBUTE_NAME.equals(attribute.getName())) + styleSeen = true; + List<String> allowedValuesFromAttribute = attribute.getLiterals(); + if (allowedValuesFromAttribute != null && allowedValuesFromAttribute.size() > 0) { + for (String allowedValue : allowedValuesFromAttribute) { + policyBuilder.allowAttributes(attribute.getName()).matching(true, allowedValue) + .onElements(tag.getValue().getName()); + } + + } + List<Pattern> regexsFromAttribute = attribute.getPatternList(); + if (regexsFromAttribute != null && regexsFromAttribute.size() > 0) { + policyBuilder.allowAttributes(attribute.getName()) + .matching(matchesToPatterns(regexsFromAttribute)) + .onElements(tag.getValue().getName()); + } else { + policyBuilder.allowAttributes(attribute.getName()).onElements(tag.getValue().getName()); + } + } + + if (!styleSeen) { + policyBuilder.allowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .matching(cssValidator.newCssAttributePolicy()).onElements(tag.getValue().getName()); + } + } + break; + } + } + + // disallow style tag on specific elements + policyBuilder.disallowAttributes(CssValidator.STYLE_ATTRIBUTE_NAME) + .onElements(cssValidator.getDisallowedTagNames().toArray(new String[0])); + + // ---------- dynamic attributes ------------ + Map<String, Attribute> dynamicAttributes = new HashMap<>(); + // checks if the dynamic attributes are allowed + if (policy.getDirectives().get("allowDynamicAttributes").equals("true")) { Review Comment: We should extract these string values as constants somewhere and reference them from the code. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
