TEXT-40 - Escape HTML characters only once revert as per the issue comments
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/e9273cd4 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/e9273cd4 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/e9273cd4 Branch: refs/heads/release Commit: e9273cd4bb3da622ed761c998a6fb6e731538e18 Parents: 40061c7 Author: Sebb <s...@apache.org> Authored: Wed Feb 22 16:14:46 2017 +0000 Committer: Sebb <s...@apache.org> Committed: Wed Feb 22 16:14:46 2017 +0000 ---------------------------------------------------------------------- src/changes/changes.xml | 9 +- .../apache/commons/text/StringEscapeUtils.java | 93 +---------- .../text/translate/SingleLookupTranslator.java | 153 ------------------- .../commons/text/StringEscapeUtilsTest.java | 50 ------ 4 files changed, 9 insertions(+), 296 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 94db412..155d6f8 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -45,7 +45,14 @@ The <action> type attribute can be add,update,fix,remove. </properties> <body> - <release version="TBA" date="TBA" description="TBA"> + <release version="TBA" date="TBA" description=" + + Incompatible changes + ==================== + Methods StringEscapeUtils#escapeHtml3Once and StringEscapeUtils#escapeHtml4Once + have been removed; see TEXT-40 + "> + <action issue="TEXT-40" type="remove" dev="sebb">Escape HTML characters only once: revert</action> <action issue="TEXT-65" type="fix" dev="chtompki">Fixing the 200 checkstyle errors present in 1.0-beta-1</action> <action issue="TEXT-63" type="fix" dev="sebb">Mutable fields should be private</action> </release> http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/StringEscapeUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/StringEscapeUtils.java b/src/main/java/org/apache/commons/text/StringEscapeUtils.java index f98f116..05d2348 100644 --- a/src/main/java/org/apache/commons/text/StringEscapeUtils.java +++ b/src/main/java/org/apache/commons/text/StringEscapeUtils.java @@ -26,7 +26,6 @@ import org.apache.commons.text.translate.LookupTranslator; import org.apache.commons.text.translate.NumericEntityEscaper; import org.apache.commons.text.translate.NumericEntityUnescaper; import org.apache.commons.text.translate.OctalUnescaper; -import org.apache.commons.text.translate.SingleLookupTranslator; import org.apache.commons.text.translate.UnicodeUnescaper; import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover; @@ -205,25 +204,6 @@ public class StringEscapeUtils { ); /** - * The improved translator object for escaping HTML version 3.0. - * The 'improved' part of this translator is that it checks if the html is already translated. - * This check prevents double, triple, or recursive translations. - * - * While {@link #escapeHtml3Once(String)} is the expected method of use, this - * object allows the HTML escaping functionality to be used - * as the foundation for a custom translator. - * - * Note that, multiple lookup tables should be passed to this translator - * instead of passing multiple instances of this translator to the - * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the - * lookup table passed to that instance while deciding whether a value is - * already translated or not. - */ - public static final CharSequenceTranslator ESCAPE_HTML3_ONCE = - new SingleLookupTranslator(EntityArrays.BASIC_ESCAPE, EntityArrays.ISO8859_1_ESCAPE); - - - /** * Translator object for escaping HTML version 4.0. * * While {@link #escapeHtml4(String)} is the expected method of use, this @@ -238,28 +218,6 @@ public class StringEscapeUtils { ); /** - * The improved translator object for escaping HTML version 4.0. - * The 'improved' part of this translator is that it checks if the html is already translated. - * This check prevents double, triple, or recursive translations. - * - * While {@link #escapeHtml4Once(String)} is the expected method of use, this - * object allows the HTML escaping functionality to be used - * as the foundation for a custom translator. - * - * Note that, multiple lookup tables should be passed to this translator - * instead of passing multiple instances of this translator to the - * AggregateTranslator. Because, a SingleLookupTranslator only checks the values of the - * lookup table passed to that instance while deciding whether a value is - * already translated or not. - */ - public static final CharSequenceTranslator ESCAPE_HTML4_ONCE = - new SingleLookupTranslator( - EntityArrays.BASIC_ESCAPE, - EntityArrays.ISO8859_1_ESCAPE, - EntityArrays.HTML40_EXTENDED_ESCAPE - ); - - /** * Translator object for escaping individual Comma Separated Values. * * While {@link #escapeCsv(String)} is the expected method of use, this @@ -702,43 +660,6 @@ public class StringEscapeUtils { } /** - * <p>Escapes the characters in a {@code String} using HTML entities. - * But escapes them only once. i.e. does not escape already escaped characters.</p> - * - * <p> - * For example: - * </p> - * <p><code>"bread" & "butter"</code></p> - * becomes: - * <p> - * <code>&quot;bread&quot; &amp; &quot;butter&quot;</code>. - * </p> - * - * <p> - * But: - * </p> - * <p><code>&quot;bread&quot; &amp; &quot;butter&quot;</code></p> - * remains unaffected. - * - * <p>Supports all known HTML 4.0 entities, including funky accents. - * Note that the commonly used apostrophe escape character (&apos;) - * is not a legal entity and so is not supported). </p> - * - * @param input the {@code String} to escape, may be null - * @return a new escaped {@code String}, {@code null} if null string input - * - * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> - * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> - * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> - * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> - * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> - */ - public static final String escapeHtml4Once(final String input) { - return ESCAPE_HTML4_ONCE.translate(input); - } - - - /** * <p>Escapes the characters in a {@code String} using HTML entities.</p> * <p>Supports only the HTML 3.0 entities. </p> * @@ -749,18 +670,6 @@ public class StringEscapeUtils { return ESCAPE_HTML3.translate(input); } - /** - * <p>Escapes the characters in a {@code String} using HTML entities. - * But escapes them only once. i.e. does not escape already escaped characters.</p> - * <p>Supports only the HTML 3.0 entities. </p> - * - * @param input the {@code String} to escape, may be null - * @return a new escaped {@code String}, {@code null} if null string input - */ - public static final String escapeHtml3Once(final String input) { - return ESCAPE_HTML3_ONCE.translate(input); - } - //----------------------------------------------------------------------- /** * <p>Unescapes a string containing entity escapes to a string @@ -768,7 +677,7 @@ public class StringEscapeUtils { * escapes. Supports HTML 4.0 entities.</p> * * <p>For example, the string {@code "<Français>"} - * will become {@code "<Fran�ais>"}</p> + * will become {@code "<Fran�ais>"}</p> * * <p>If an entity is unrecognized, it is left alone, and inserted * verbatim into the result string. e.g. {@code ">&zzzz;x"} will http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java b/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java deleted file mode 100644 index 8fafab8..0000000 --- a/src/main/java/org/apache/commons/text/translate/SingleLookupTranslator.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.commons.text.translate; - -import java.io.IOException; -import java.io.Writer; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Map; - -/** - * Translates a value using a lookup table. - * But doesn't translate if that value is already translated. - * - * @since 1.0 - */ -public class SingleLookupTranslator extends CharSequenceTranslator { - - /** The lookupMap to be used for translation. */ - private final Map<String, String> lookupMap; - /** The first character of each key in the lookupMap. */ - private final HashSet<Character> prefixSet; - /** The length of the shortest key in the lookupMap. */ - private final int shortest; - /** The length of the longest key in the lookupMap. */ - private final int longest; - /** The length of the shortest value in the lookupMap. */ - private final int shortestValue; - /** The length of the longest value in the lookupMap. */ - private final int longestValue; - - /** - * Define the look tables to be used in translation. - * <p> - * Note that, as of Lang 3.1, the key to the lookup table is converted to a - * java.lang.String. This is because we need the key to support hashCode and - * equals(Object), allowing it to be the key for a HashMap. See LANG-882. - * <p> - * Also note that, multiple lookup tables should be passed to this translator - * instead of passing multiple instances of this translator to the - * AggregateTranslator. Because, this translator only checks the values of the - * lookup table passed to this instance while deciding whether a value is - * already translated or not. - * - * @param inputMaps an array of Map<CharSequence, CharSequence>. - */ - public SingleLookupTranslator(Map<CharSequence, CharSequence>... inputMaps) { - Map<CharSequence, CharSequence> lookup = new HashMap<>(); - for (Map<CharSequence, CharSequence> input : inputMaps) { - Iterator<Map.Entry<CharSequence, CharSequence>> it = input.entrySet().iterator(); - while (it.hasNext()) { - Map.Entry<CharSequence, CharSequence> pair = it.next(); - lookup.put(pair.getKey(), pair.getValue()); - } - } - lookupMap = new HashMap<String, String>(); - prefixSet = new HashSet<Character>(); - int _shortest = Integer.MAX_VALUE; - int _longest = 0; - int _shortestValue = Integer.MAX_VALUE; - int _longestValue = 0; - if (lookup != null) { - Iterator<Map.Entry<CharSequence, CharSequence>> it = lookup.entrySet().iterator(); - while (it.hasNext()) { - Map.Entry<CharSequence, CharSequence> pair = it.next(); - this.lookupMap.put(pair.getKey().toString(), pair.getValue().toString()); - this.prefixSet.add(pair.getKey().charAt(0)); - final int sz = pair.getKey().length(); - if (sz < _shortest) { - _shortest = sz; - } - if (sz > _longest) { - _longest = sz; - } - final int sizeOfValue = lookup.get(pair.getKey()).length(); - if (sizeOfValue < _shortestValue) { - _shortestValue = sizeOfValue; - } - if (sizeOfValue > _longestValue) { - _longestValue = sizeOfValue; - } - } - } - shortest = _shortest; - longest = _longest; - shortestValue = _shortestValue; - longestValue = _longestValue; - } - - /** - * Translate a set of codepoints, represented by an int index into a CharSequence, - * into another set of codepoints. The number of codepoints consumed must be returned, - * and the only IOExceptions thrown must be from interacting with the Writer so that - * the top level API may reliably ignore StringWriter IOExceptions. - * - * @param input CharSequence that is being translated - * @param index int representing the current point of translation - * @param out Writer to translate the text to - * @return int count of codepoints consumed - * @throws IOException if and only if the Writer produces an IOException - */ - @Override - public int translate(CharSequence input, int index, Writer out) throws IOException { - // check if already translated - int maxValue = longestValue; - if (index + maxValue > input.length()) { - maxValue = input.length() - index; - } - // implement greedy algorithm to check all the possible 'value' matches - // for which we need to skip translation. - for (int i = maxValue; i >= shortestValue; i--) { - final CharSequence subSeq = input.subSequence(index, index + i); - // If the sub-string is already translated, return without translating. - if (lookupMap.containsValue(subSeq.toString())) { - return 0; - } - } - - // check if translation exists for the input at position index - if (prefixSet.contains(input.charAt(index))) { - int max = longest; - if (index + longest > input.length()) { - max = input.length() - index; - } - // implement greedy algorithm by trying maximum match first - for (int i = max; i >= shortest; i--) { - final CharSequence subSeq = input.subSequence(index, index + i); - final String result = lookupMap.get(subSeq.toString()); - - if (result != null) { - out.write(result); - return i; - } - } - } - return 0; - } -} http://git-wip-us.apache.org/repos/asf/commons-text/blob/e9273cd4/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java index f716763..ef9d8ab 100644 --- a/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java +++ b/src/test/java/org/apache/commons/text/StringEscapeUtilsTest.java @@ -241,56 +241,6 @@ public class StringEscapeUtilsTest { } @Test - public void testEscapeHtml4Once() { - for (final String[] element : HTML_ESCAPES) { - final String message = element[0]; - final String expected = element[1]; - final String original = element[2]; - assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(original)); - assertEquals(message, expected, StringEscapeUtils.escapeHtml4Once(expected)); - final StringWriter sw = new StringWriter(); - try { - StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(original, sw); - } catch (final IOException e) { - } - final String actual = original == null ? null : sw.toString(); - assertEquals(message, expected, actual); - final StringWriter sw2 = new StringWriter(); - try { - StringEscapeUtils.ESCAPE_HTML4_ONCE.translate(expected, sw2); - } catch (final IOException e) { - } - final String actual2 = original == null ? null : sw2.toString(); - assertEquals(message, expected, actual2); - } - } - - @Test - public void testEscapeHtml3Once() { - for (final String[] element : HTML_ESCAPES) { - final String message = element[0]; - final String expected = element[1]; - final String original = element[2]; - assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(original)); - assertEquals(message, expected, StringEscapeUtils.escapeHtml3Once(expected)); - final StringWriter sw = new StringWriter(); - try { - StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(original, sw); - } catch (final IOException e) { - } - final String actual = original == null ? null : sw.toString(); - assertEquals(message, expected, actual); - final StringWriter sw2 = new StringWriter(); - try { - StringEscapeUtils.ESCAPE_HTML3_ONCE.translate(expected, sw2); - } catch (final IOException e) { - } - final String actual2 = original == null ? null : sw2.toString(); - assertEquals(message, expected, actual2); - } - } - - @Test public void testUnescapeHtml4() { for (final String[] element : HTML_ESCAPES) { final String message = element[0];