TEXT-58: Making the LookupTranslator constructor generalized
Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/bdfa8309 Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/bdfa8309 Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/bdfa8309 Branch: refs/heads/master Commit: bdfa83094669b649504695032cd2aac57e500592 Parents: ddd93aa Author: Rob Tompkins <chtom...@gmail.com> Authored: Sat Jan 21 11:07:04 2017 -0500 Committer: Rob Tompkins <chtom...@gmail.com> Committed: Sat Jan 21 11:07:04 2017 -0500 ---------------------------------------------------------------------- .../commons/text/translate/EntityArrays.java | 50 +++++++++++--------- .../text/translate/LookupTranslator.java | 40 +++++++++++++++- .../text/translate/EntityArraysTest.java | 7 +-- .../text/translate/LookupTranslatorTest.java | 12 +++-- 4 files changed, 79 insertions(+), 30 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/main/java/org/apache/commons/text/translate/EntityArrays.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java b/src/main/java/org/apache/commons/text/translate/EntityArrays.java index 1165b1a..95dab76 100644 --- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java +++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java @@ -136,12 +136,13 @@ public class EntityArrays { }; /** - * Mapping to escape <a href="https://secure.wikimedia.org/wikipedia/en/wiki/ISO/IEC_8859-1">ISO-8859-1</a> + * A Map<CharSequence, CharSequence> to to escape + * <a href="https://secure.wikimedia.org/wikipedia/en/wiki/ISO/IEC_8859-1">ISO-8859-1</a> * characters to their named HTML 3.x equivalents. */ - public static final Map<String,String> ISO8859_1_ESCAPE; + public static final Map<CharSequence, CharSequence> ISO8859_1_ESCAPE; static { - Map<String,String> initialMap = new HashMap<>(); + Map<CharSequence, CharSequence> initialMap = new HashMap<>(); initialMap.put("\u00A0", " "); // non-breaking space initialMap.put("\u00A1", "¡"); // inverted exclamation mark initialMap.put("\u00A2", "¢"); // cent sign @@ -252,7 +253,7 @@ public class EntityArrays { /** * Reverse of {@link #ISO8859_1_ESCAPE} for unescaping purposes. */ - public static final Map<String,String> ISO8859_1_UNESCAPE; + public static final Map<CharSequence, CharSequence> ISO8859_1_UNESCAPE; static { ISO8859_1_UNESCAPE = Collections.unmodifiableMap(invert(ISO8859_1_ESCAPE)); } @@ -463,14 +464,14 @@ public class EntityArrays { }; /** - * A Map<String, String> to escape additional + * A Map<CharSequence, CharSequence> to escape additional * <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">character entity * references</a>. Note that this must be used with {@link #ISO8859_1_ESCAPE} to get the full list of * HTML 4.0 character entities. */ - public static final Map<String,String> HTML40_EXTENDED_ESCAPE; + public static final Map<CharSequence, CharSequence> HTML40_EXTENDED_ESCAPE; static { - Map<String,String> initialMap = new HashMap<>(); + Map<CharSequence, CharSequence> initialMap = new HashMap<>(); // <!-- Latin Extended-B --> initialMap.put("\u0192", "ƒ"); // latin small f with hook = function= florin, U+0192 ISOtech --> // <!-- Greek --> @@ -679,7 +680,7 @@ public class EntityArrays { /** * Reverse of {@link #HTML40_EXTENDED_ESCAPE} for unescaping purposes. */ - public static final Map<String,String> HTML40_EXTENDED_UNESCAPE; + public static final Map<CharSequence, CharSequence> HTML40_EXTENDED_UNESCAPE; static { HTML40_EXTENDED_UNESCAPE = Collections.unmodifiableMap(invert(HTML40_EXTENDED_ESCAPE)); } @@ -700,13 +701,14 @@ public class EntityArrays { }; /** - * A Map<String, String> to escape the basic XML and HTML character entities. + * A Map<CharSequence, CharSequence> to escape the basic XML and HTML + * character entities. * * Namely: {@code " & < >} */ - public static final Map<String,String> BASIC_ESCAPE; + public static final Map<CharSequence, CharSequence> BASIC_ESCAPE; static { - Map<String,String> initialMap = new HashMap<>(); + Map<CharSequence, CharSequence> initialMap = new HashMap<>(); initialMap.put("\"", """); // " - double-quote initialMap.put("&", "&"); // & - ampersand initialMap.put("<", "<"); // < - less-than @@ -725,7 +727,7 @@ public class EntityArrays { /** * Reverse of {@link #BASIC_ESCAPE} for unescaping purposes. */ - public static final Map<String,String> BASIC_UNESCAPE; + public static final Map<CharSequence, CharSequence> BASIC_UNESCAPE; static { BASIC_UNESCAPE = Collections.unmodifiableMap(invert(BASIC_ESCAPE)); } @@ -741,11 +743,12 @@ public class EntityArrays { }; /** - * A Map<String, String> to escape the apostrophe character to its XML character entity. + * A Map<CharSequence, CharSequence> to escape the apostrophe character to + * its XML character entity. */ - public static final Map<String,String> APOS_ESCAPE; + public static final Map<CharSequence, CharSequence> APOS_ESCAPE; static { - Map<String,String> initialMap = new HashMap<>(); + Map<CharSequence, CharSequence> initialMap = new HashMap<>(); initialMap.put("'","'"); // XML apostrophe APOS_ESCAPE = Collections.unmodifiableMap(initialMap); } @@ -761,7 +764,7 @@ public class EntityArrays { /** * Reverse of {@link #APOS_ESCAPE()} for unescaping purposes. */ - public static final Map<String, String> APOS_UNESCAPE; + public static final Map<CharSequence, CharSequence> APOS_UNESCAPE; static { APOS_UNESCAPE = Collections.unmodifiableMap(invert(APOS_ESCAPE)); } @@ -783,13 +786,14 @@ public class EntityArrays { }; /** - * A Map<String, String> to escape the Java control characters. + * A Map<CharSequence, CharSequence> to escape the Java + * control characters. * * Namely: {@code \b \n \t \f \r} */ - public static final Map<String, String> JAVA_CTRL_CHARS_ESCAPE; + public static final Map<CharSequence, CharSequence> JAVA_CTRL_CHARS_ESCAPE; static { - Map<String,String> initialMap = new HashMap<>(); + Map<CharSequence, CharSequence> initialMap = new HashMap<>(); initialMap.put("\b", "\\b"); initialMap.put("\n", "\\n"); initialMap.put("\t", "\\t"); @@ -809,7 +813,7 @@ public class EntityArrays { /** * Reverse of {@link #JAVA_CTRL_CHARS_ESCAPE} for unescaping purposes. */ - public static final Map<String, String> JAVA_CTRL_CHARS_UNESCAPE; + public static final Map<CharSequence, CharSequence> JAVA_CTRL_CHARS_UNESCAPE; static { JAVA_CTRL_CHARS_UNESCAPE = Collections.unmodifiableMap(invert(JAVA_CTRL_CHARS_ESCAPE)); } @@ -834,9 +838,9 @@ public class EntityArrays { * @param map Map<String, String> to be inverted * @return Map<String, String> inverted array */ - public static Map<String,String> invert(final Map<String,String> map) { - Map<String,String> newMap = new HashMap<>(); - for(String key: map.keySet()){ + public static Map<CharSequence, CharSequence> invert(final Map<CharSequence, CharSequence> map) { + Map<CharSequence, CharSequence> newMap = new HashMap<>(); + for(CharSequence key: map.keySet()){ newMap.put(map.get(key), key); } return newMap; http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/main/java/org/apache/commons/text/translate/LookupTranslator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java index 8a79580..b381938 100644 --- a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java +++ b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java @@ -18,8 +18,10 @@ package org.apache.commons.text.translate; import java.io.IOException; import java.io.Writer; +import java.security.InvalidParameterException; import java.util.HashMap; import java.util.HashSet; +import java.util.Map; /** * Translates a value using a lookup table. @@ -28,7 +30,7 @@ import java.util.HashSet; */ public class LookupTranslator extends CharSequenceTranslator { - private final HashMap<String, String> lookupMap; + private final Map<String, String> lookupMap; private final HashSet<Character> prefixSet; private final int shortest; private final int longest; @@ -42,6 +44,7 @@ public class LookupTranslator extends CharSequenceTranslator { * * @param lookup CharSequence[][] table of size [*][2] */ + @Deprecated public LookupTranslator(final CharSequence[]... lookup) { lookupMap = new HashMap<>(); prefixSet = new HashSet<>(); @@ -65,6 +68,41 @@ public class LookupTranslator extends CharSequenceTranslator { } /** + * Define the lookup table to be used in translation + * + * Note that, as of Lang 3.1 (the orgin of this code), the key to the lookup + * table is converted to a java.lang.String. This is because we need the key + * to support hashCode and equals(Object), allowing it to be the key for a + * HashMap. See LANG-882. + * + * @param lookupMap Map<CharSequence, CharSequence> table of translator + * mappings + */ + public LookupTranslator(final Map<CharSequence, CharSequence> lookupMap) { + if (lookupMap == null) { + throw new InvalidParameterException("lookupMap cannot be null"); + } + this.lookupMap = new HashMap<>(); + prefixSet = new HashSet<>(); + int _shortest = Integer.MAX_VALUE; + int _longest = 0; + for (final CharSequence key : lookupMap.keySet()) { + this.lookupMap.put(key.toString(), + lookupMap.get(key).toString()); + this.prefixSet.add(key.charAt(0)); + final int sz = key.length(); + if (sz < _shortest) { + _shortest = sz; + } + if (sz > _longest) { + _longest = sz; + } + } + shortest = _shortest; + longest = _longest; + } + + /** * {@inheritDoc} */ @Override http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java index 6b4a0ca..1f7c807 100644 --- a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java +++ b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java @@ -113,9 +113,10 @@ public class EntityArraysTest { testEscapeVsUnescapeMaps(EntityArrays.JAVA_CTRL_CHARS_ESCAPE, EntityArrays.JAVA_CTRL_CHARS_UNESCAPE); } - private void testEscapeVsUnescapeMaps(final Map<String,String> escapeMap, final Map<String,String> unescapeMap) { - for (final String escapeKey : escapeMap.keySet()) { - for (final String unescapeKey : unescapeMap.keySet()) { + private void testEscapeVsUnescapeMaps(final Map<CharSequence, CharSequence> escapeMap, + final Map<CharSequence, CharSequence> unescapeMap) { + for (final CharSequence escapeKey : escapeMap.keySet()) { + for (final CharSequence unescapeKey : unescapeMap.keySet()) { if (escapeKey == unescapeMap.get(unescapeKey)) { assertEquals(escapeMap.get(escapeKey), unescapeKey); } http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java b/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java index e9559a5..ebf7af5 100644 --- a/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java +++ b/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java @@ -21,17 +21,21 @@ import org.junit.Test; import java.io.IOException; import java.io.StringWriter; +import java.util.HashMap; +import java.util.Map; import static org.junit.Assert.assertEquals; /** - * Unit tests for {@link org.apache.commons.text.translate.LookupTranslator}. + * Unit tests for {@link LookupTranslator}. */ public class LookupTranslatorTest { @Test public void testBasicLookup() throws IOException { - final org.apache.commons.text.translate.LookupTranslator lt = new org.apache.commons.text.translate.LookupTranslator(new CharSequence[][] { { "one", "two" } }); + final Map<CharSequence, CharSequence> translatorMap = new HashMap<>(); + translatorMap.put("one", "two"); + final LookupTranslator lt = new LookupTranslator(translatorMap); final StringWriter out = new StringWriter(); final int result = lt.translate("one", 0, out); assertEquals("Incorrect codepoint consumption", 3, result); @@ -41,7 +45,9 @@ public class LookupTranslatorTest { // Tests: https://issues.apache.org/jira/browse/LANG-882 @Test public void testLang882() throws IOException { - final org.apache.commons.text.translate.LookupTranslator lt = new LookupTranslator(new CharSequence[][] { { new StringBuffer("one"), new StringBuffer("two") } }); + final Map<CharSequence, CharSequence> translatorMap = new HashMap<>(); + translatorMap.put(new StringBuffer("one"), new StringBuffer("two")); + final LookupTranslator lt = new LookupTranslator(translatorMap); final StringWriter out = new StringWriter(); final int result = lt.translate(new StringBuffer("one"), 0, out); assertEquals("Incorrect codepoint consumption", 3, result);