Hello, I'm doing the same thing, and I write a new Sanitize class to accept more tags and parameters. I use this sanitizer before and after the setHtml() and getHtml() of the RichTextArea. I share the file in attachment, If there are some issues thanks to inform me.
Patrice -- You received this message because you are subscribed to the Google Groups "Google Web Toolkit" group. To post to this group, send email to [email protected]. To unsubscribe from this group, send email to [email protected]. For more options, visit this group at http://groups.google.com/group/google-web-toolkit?hl=en.
package cef.kephas.client.utils; /** Copyright (c) 2009 Open Lab, http://www.open-lab.com/ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. This class was adapted to GWT by Patrice de Saint St?ban */ import java.util.ArrayList; import java.util.List; import java.util.Stack; import com.google.gwt.regexp.shared.MatchResult; import com.google.gwt.regexp.shared.RegExp; import com.google.gwt.safehtml.shared.HtmlSanitizer; import com.google.gwt.safehtml.shared.SafeHtml; import com.google.gwt.safehtml.shared.SafeHtmlBuilder; import com.google.gwt.safehtml.shared.SafeHtmlUtils; public class ComplexeHtmlSanitizer implements HtmlSanitizer { public static RegExp forbiddenTags = RegExp .compile("^(script|object|embed|link|style|form|input|iframe)$"); public static RegExp allowedTags = RegExp .compile("^(b|p|i|s|a|img|table|thead|tbody|tfoot|tr|th|td|dd|dl|dt|em|h1|h2|h3|h4|h5|h6|li|ul|ol|span|div|strike|strong|" + "sub|sup|pre|del|code|blockquote|strike|kbd|br|hr|area|map|object|embed|param|link|form|small|big|font)$"); private static RegExp commentRegExp = RegExp.compile("<!--.*"); // <!--.........> private static RegExp tagStartRegExp = RegExp .compile("<(\\w+\\b)\\s*(.*)/?>$"); // <tag ....props.....> private static RegExp tagCloseRegExp = RegExp.compile("</(\\w+\\b)\\s*>$"); // </tag // .........> private static RegExp standAloneTags = RegExp.compile("^(img|br|hr)$"); private static RegExp selfClosed = RegExp.compile("<.+/>"); private static RegExp attributesRegExp = RegExp.compile( "(\\w*)\\s*=\\s*\"([^\"]*)\"", "g"); // prop="...." private static RegExp styleRegExp = RegExp.compile( "([^\\s^:]+)\\s*:\\s*([^;]+);?", "g"); // color:red; private static RegExp urlStyleRegExp = RegExp.compile( ".*\\b\\s*url\\s*\\(['\"]([^)]*)['\"]\\)", "g"); // url('....')" public static RegExp forbiddenStyleRegExp = RegExp.compile( "(expression|eval|javascript)\\s*\\(", "g"); // expression(....)" // thanks to // Ben // Summer private static ComplexeHtmlSanitizer instance = new ComplexeHtmlSanitizer(); public static ComplexeHtmlSanitizer getInstance() { return instance; } /** * Used to clean every html before to output it in any html page * * @param html * @return sanitized html */ public SafeHtml sanitize(String html) { _sanitizeHtml(html); return this.safeHtmlBuilder.toSafeHtml(); } public static SafeHtml sanitizeHtml(String html) { return instance.sanitize(html); } private void _sanitizeHtml(String html) { safeHtmlBuilder = new SafeHtmlBuilder(); if (html == null || html.isEmpty()) { safeHtmlBuilder.append(' '); return; } Stack<String> openTags = new Stack<String>(); List<String> tokens = tokenize(html); // ------------------- LOOP for every token -------------------------- for (String token : tokens) { boolean isAcceptedToken = false; MatchResult endMatcher = tagCloseRegExp.exec(token); MatchResult startMatcher = tagStartRegExp.exec(token); // -------------------------------------------------------------------------------- // COMMENT <!-- ......... --> if (commentRegExp.test(token)) { invalidTags.add(token + (token.endsWith("-->") ? "" : "-->")); continue; // -------------------------------------------------------------------------------- // OPEN TAG <tag .........> } else if (startMatcher != null) { // tag name extraction String tag = startMatcher.getGroup(1).toLowerCase(); // ----------------------------------------------------- // FORBIDDEN TAG <script .........> if (forbiddenTags.test(tag)) { invalidTags.add("<" + tag + ">"); continue; // -------------------------------------------------- WELL // KNOWN TAG } else if (allowedTags.test(tag)) { String cleanToken = "<" + tag; String tokenBody = startMatcher.getGroup(2); // first test table consistency // table tbody tfoot thead th tr td if ("thead".equals(tag) || "tbody".equals(tag) || "tfoot".equals(tag) || "tr".equals(tag)) { if (openTags.search("table") < 1) { invalidTags.add("<" + tag + ">"); continue; } } else if ("td".equals(tag) || "th".equals(tag)) { if (openTags.search("tr") < 1) { invalidTags.add("<" + tag + ">"); continue; } } // then test properties boolean foundURL = false; // URL flag for (MatchResult attributes = attributesRegExp .exec(tokenBody); attributes != null; attributes = attributesRegExp .exec(tokenBody)) { String attr = attributes.getGroup(1).toLowerCase(); String val = attributes.getGroup(2); // we will accept href in case of <A> if ("a".equals(tag) && "href".equals(attr)) { // <a // href="......"> String[] customSchemes = { "http", "https" }; if (new UrlValidator(customSchemes).isValid(val)) { foundURL = true; } else { // may be it is a mailto? // case <a // href="mailto:[email protected]?subject=...." if (val.toLowerCase().startsWith("mailto:") && val.indexOf("@") >= 0) { String val1 = "http://www." + val.substring(val.indexOf("@") + 1); if (new UrlValidator(customSchemes) .isValid(val1)) { foundURL = true; } else { invalidTags.add(attr + " " + val); val = ""; } } else { invalidTags.add(attr + " " + val); val = ""; } } } else if (tag.matches("img|embed") && "src".equals(attr)) { // <img src="......"> String[] customSchemes = { "http", "https" }; if (new UrlValidator(customSchemes).isValid(val)) { foundURL = true; } else { invalidTags.add(attr + " " + val); val = ""; } } else if ("href".equals(attr) || "src".equals(attr)) { // <tag // src/href="......"> // skipped invalidTags.add(tag + " " + attr + " " + val); continue; } else if (attr.matches("width|height")) { // <tag // width/height="......"> if (!val.toLowerCase().matches("\\d+%|\\d+$")) { // test // numeric // values invalidTags.add(tag + " " + attr + " " + val); continue; } } else if ("style".equals(attr)) { // <tag // style="......"> // then test properties MatchResult styles = styleRegExp.exec(val); String cleanStyle = ""; while (styles != null) { String styleName = styles.getGroup(1) .toLowerCase(); String styleValue = styles.getGroup(2); // suppress invalid styles values if (forbiddenStyleRegExp.test(styleValue)) { invalidTags.add(tag + " " + attr + " " + styleValue); continue; } // check if valid url MatchResult urlStyleMatcher = urlStyleRegExp .exec(styleValue); if (urlStyleMatcher != null) { String[] customSchemes = { "http", "https" }; String url = urlStyleMatcher.getGroup(1); if (!new UrlValidator(customSchemes) .isValid(url)) { invalidTags.add(tag + " " + attr + " " + styleValue); continue; } } cleanStyle = cleanStyle + styleName + ":" + encode(styleValue) + ";"; styles = styleRegExp.exec(val); } val = cleanStyle; } else if (attr.startsWith("on")) { // skip all // JavaScript events invalidTags.add(tag + " " + attr + " " + val); continue; } else { // by default encode all properties val = encode(val); } cleanToken = cleanToken + " " + attr + "=\"" + val + "\""; } cleanToken = cleanToken + ">"; isAcceptedToken = true; // for <img> and <a> if (tag.matches("a|img|embed") && !foundURL) { isAcceptedToken = false; cleanToken = ""; } token = cleanToken; // push the tag if require closure and it is accepted // (otherwirse is encoded) if (isAcceptedToken && !(standAloneTags.test(tag) || selfClosed .test(tag))) openTags.push(tag); // -------------------------------------------------------------------------------- // UNKNOWN TAG } else { invalidTags.add(token); continue; } // -------------------------------------------------------------------------------- // CLOSE TAG </tag> } else if (tagCloseRegExp.test(token)) { String tag = endMatcher.getGroup(1).toLowerCase(); // is self closing if (selfClosed.test(tag)) { invalidTags.add(token); continue; } if (forbiddenTags.test(tag)) { invalidTags.add("/" + tag); continue; } if (!allowedTags.test(tag)) { invalidTags.add(token); continue; } else { String cleanToken = ""; // check tag position in the stack int pos = openTags.search(tag); // if found on top ok for (int i = 1; i <= pos; i++) { // pop all elements before tag and close it String poppedTag = openTags.pop(); cleanToken = cleanToken + "</" + poppedTag + ">"; isAcceptedToken = true; } token = cleanToken; } } if (isAcceptedToken) { this.safeHtmlBuilder.appendHtmlConstant(token); // ret.text = ret.text + " "; } else { String sanToken = htmlEncodeApexesAndTags(token); this.safeHtmlBuilder.append(SafeHtmlUtils .fromSafeConstant(sanToken)); this.textBuilder .append(htmlEncodeApexesAndTags(removeLineFeed(token))); } } // must close remaining tags while (openTags.size() > 0) { // pop all elements before tag and close it String poppedTag = openTags.pop(); this.safeHtmlBuilder.appendHtmlConstant("</" + poppedTag + ">"); } // set boolean value isValid = invalidTags.size() == 0; } /** * Splits html tag and tag content <......>. * * @param html * @return a list of token */ private static List<String> tokenize(String html) { ArrayList<String> tokens = new ArrayList<String>(); int pos = 0; String token = ""; int len = html.length(); while (pos < len) { char c = html.charAt(pos); String ahead = html.substring(pos, pos > len - 4 ? len : pos + 4); // a comment is starting if ("<!--".equals(ahead)) { // store the current token if (token.length() > 0) tokens.add(token); // clear the token token = ""; // serch the end of <......> int end = moveToMarkerEnd(pos, "-->", html); tokens.add(html.substring(pos, end)); pos = end; // a new "<" token is starting } else if ('<' == c) { // store the current token if (token.length() > 0) tokens.add(token); // clear the token token = ""; // serch the end of <......> int end = moveToMarkerEnd(pos, ">", html); tokens.add(html.substring(pos, end)); pos = end; } else { token = token + c; pos++; } } // store the last token if (token.length() > 0) tokens.add(token); return tokens; } private static int moveToMarkerEnd(int pos, String marker, String s) { int i = s.indexOf(marker, pos); if (i > -1) pos = i + marker.length(); else pos = s.length(); return pos; } /** * Contains the sanitizing results. html is the sanitized html encoded ready * to be printed. Unaccepted tag are encode, text inside tag is always * encoded MUST BE USED WHEN PRINTING HTML text is the text inside valid * tags. Contains invalid tags encoded SHOULD BE USED TO PRINT EXCERPTS * isValid is true when every tag is accepted without forcing encoding * invalidTags is the list of encoded-killed tags */ private SafeHtmlBuilder safeHtmlBuilder = new SafeHtmlBuilder(); private StringBuilder textBuilder = new StringBuilder(); private boolean isValid = true; private List<String> invalidTags = new ArrayList<String>(); public SafeHtml getHtml() { return safeHtmlBuilder.toSafeHtml(); } public void setHtml(SafeHtml html) { this.safeHtmlBuilder = new SafeHtmlBuilder().append(html); } public void setHtml(String html) { _sanitizeHtml(html); } public String getText() { return textBuilder.toString(); } public void setText(StringBuilder text) { this.textBuilder = text; } public boolean isValid() { return isValid; } public void setValid(boolean isValid) { this.isValid = isValid; } public List<String> getInvalidTags() { return invalidTags; } public void setInvalidTags(List<String> invalidTags) { this.invalidTags = invalidTags; } public static String encode(String s) { return convertLineFeedToBR(htmlEncodeApexesAndTags(s == null ? "" : s)); } public static final String htmlEncodeApexesAndTags(String source) { return htmlEncodeTag(htmlEncodeApexes(source)); } public static final String htmlEncodeApexes(String source) { if (source != null) { String result = replaceAllNoRegex(source, new String[] { "\"", "'" }, new String[] { """, "'" }); return result; } else return null; } public static final String htmlEncodeTag(String source) { if (source != null) { String result = replaceAllNoRegex(source, new String[] { "<", ">" }, new String[] { "<", ">" }); return result; } else return null; } public static String convertLineFeedToBR(String text) { if (text != null) return replaceAllNoRegex(text, new String[] { "\n", "\f", "\r" }, new String[] { "<br/>", "<br/>", " " }); else return null; } public static String removeLineFeed(String text) { if (text != null) return replaceAllNoRegex(text, new String[] { "\n", "\f", "\r" }, new String[] { " ", " ", " " }); else return null; } public static final String replaceAllNoRegex(String source, String searches[], String replaces[]) { int k; String tmp = source; for (k = 0; k < searches.length; k++) tmp = replaceAllNoRegex(tmp, searches[k], replaces[k]); return tmp; } public static final String replaceAllNoRegex(String source, String search, String replace) { StringBuffer buffer = new StringBuffer(); if (source != null) { if (search.length() == 0) return source; int oldPos, pos; for (oldPos = 0, pos = source.indexOf(search, oldPos); pos != -1; oldPos = pos + search.length(), pos = source.indexOf(search, oldPos)) { buffer.append(source.substring(oldPos, pos)); buffer.append(replace); } if (oldPos < source.length()) buffer.append(source.substring(oldPos)); } return new String(buffer); } }
