TEXT-58: Making the LookupTranslator constructor generalized

Project: http://git-wip-us.apache.org/repos/asf/commons-text/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-text/commit/bdfa8309
Tree: http://git-wip-us.apache.org/repos/asf/commons-text/tree/bdfa8309
Diff: http://git-wip-us.apache.org/repos/asf/commons-text/diff/bdfa8309

Branch: refs/heads/master
Commit: bdfa83094669b649504695032cd2aac57e500592
Parents: ddd93aa
Author: Rob Tompkins <chtom...@gmail.com>
Authored: Sat Jan 21 11:07:04 2017 -0500
Committer: Rob Tompkins <chtom...@gmail.com>
Committed: Sat Jan 21 11:07:04 2017 -0500

----------------------------------------------------------------------
 .../commons/text/translate/EntityArrays.java    | 50 +++++++++++---------
 .../text/translate/LookupTranslator.java        | 40 +++++++++++++++-
 .../text/translate/EntityArraysTest.java        |  7 +--
 .../text/translate/LookupTranslatorTest.java    | 12 +++--
 4 files changed, 79 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/main/java/org/apache/commons/text/translate/EntityArrays.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/text/translate/EntityArrays.java 
b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
index 1165b1a..95dab76 100644
--- a/src/main/java/org/apache/commons/text/translate/EntityArrays.java
+++ b/src/main/java/org/apache/commons/text/translate/EntityArrays.java
@@ -136,12 +136,13 @@ public class EntityArrays {
     };
 
     /**
-     * Mapping to escape <a 
href="https://secure.wikimedia.org/wikipedia/en/wiki/ISO/IEC_8859-1";>ISO-8859-1</a>
+     * A Map&lt;CharSequence, CharSequence&gt; to to escape
+     * <a 
href="https://secure.wikimedia.org/wikipedia/en/wiki/ISO/IEC_8859-1";>ISO-8859-1</a>
      * characters to their named HTML 3.x equivalents.
      */
-    public static final Map<String,String> ISO8859_1_ESCAPE;
+    public static final Map<CharSequence, CharSequence> ISO8859_1_ESCAPE;
     static {
-        Map<String,String> initialMap = new HashMap<>();
+        Map<CharSequence, CharSequence> initialMap = new HashMap<>();
         initialMap.put("\u00A0", "&nbsp;"); // non-breaking space
         initialMap.put("\u00A1", "&iexcl;"); // inverted exclamation mark
         initialMap.put("\u00A2", "&cent;"); // cent sign
@@ -252,7 +253,7 @@ public class EntityArrays {
     /**
      * Reverse of {@link #ISO8859_1_ESCAPE} for unescaping purposes.
      */
-    public static final Map<String,String> ISO8859_1_UNESCAPE;
+    public static final Map<CharSequence, CharSequence> ISO8859_1_UNESCAPE;
     static {
         ISO8859_1_UNESCAPE = 
Collections.unmodifiableMap(invert(ISO8859_1_ESCAPE));
     }
@@ -463,14 +464,14 @@ public class EntityArrays {
     };
 
     /**
-     * A Map&lt;String, String&gt; to escape additional
+     * A Map&lt;CharSequence, CharSequence&gt; to escape additional
      * <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html";>character 
entity
      * references</a>. Note that this must be used with {@link 
#ISO8859_1_ESCAPE} to get the full list of
      * HTML 4.0 character entities.
      */
-    public static final Map<String,String> HTML40_EXTENDED_ESCAPE;
+    public static final Map<CharSequence, CharSequence> HTML40_EXTENDED_ESCAPE;
     static {
-        Map<String,String> initialMap = new HashMap<>();
+        Map<CharSequence, CharSequence> initialMap = new HashMap<>();
         // <!-- Latin Extended-B -->
         initialMap.put("\u0192", "&fnof;"); // latin small f with hook = 
function= florin, U+0192 ISOtech -->
         // <!-- Greek -->
@@ -679,7 +680,7 @@ public class EntityArrays {
     /**
      * Reverse of {@link #HTML40_EXTENDED_ESCAPE} for unescaping purposes.
      */
-    public static final Map<String,String> HTML40_EXTENDED_UNESCAPE;
+    public static final Map<CharSequence, CharSequence> 
HTML40_EXTENDED_UNESCAPE;
     static {
         HTML40_EXTENDED_UNESCAPE = 
Collections.unmodifiableMap(invert(HTML40_EXTENDED_ESCAPE));
     }
@@ -700,13 +701,14 @@ public class EntityArrays {
     };
 
     /**
-     * A Map&lt;String, String&gt; to escape the basic XML and HTML character 
entities.
+     * A Map&lt;CharSequence, CharSequence&gt; to escape the basic XML and HTML
+     * character entities.
      *
      * Namely: {@code " & < >}
      */
-    public static final Map<String,String> BASIC_ESCAPE;
+    public static final Map<CharSequence, CharSequence> BASIC_ESCAPE;
     static {
-        Map<String,String> initialMap = new HashMap<>();
+        Map<CharSequence, CharSequence> initialMap = new HashMap<>();
         initialMap.put("\"", "&quot;"); // " - double-quote
         initialMap.put("&", "&amp;");   // & - ampersand
         initialMap.put("<", "&lt;");    // < - less-than
@@ -725,7 +727,7 @@ public class EntityArrays {
     /**
      * Reverse of {@link #BASIC_ESCAPE} for unescaping purposes.
      */
-    public static final Map<String,String> BASIC_UNESCAPE;
+    public static final Map<CharSequence, CharSequence> BASIC_UNESCAPE;
     static {
         BASIC_UNESCAPE = Collections.unmodifiableMap(invert(BASIC_ESCAPE));
     }
@@ -741,11 +743,12 @@ public class EntityArrays {
     };
 
     /**
-     * A Map&lt;String, String&gt; to escape the apostrophe character to its 
XML character entity.
+     * A Map&lt;CharSequence, CharSequence&gt; to escape the apostrophe 
character to
+     * its XML character entity.
      */
-    public static final Map<String,String> APOS_ESCAPE;
+    public static final Map<CharSequence, CharSequence> APOS_ESCAPE;
     static {
-        Map<String,String> initialMap = new HashMap<>();
+        Map<CharSequence, CharSequence> initialMap = new HashMap<>();
         initialMap.put("'","&apos;"); // XML apostrophe
         APOS_ESCAPE = Collections.unmodifiableMap(initialMap);
     }
@@ -761,7 +764,7 @@ public class EntityArrays {
     /**
      * Reverse of {@link #APOS_ESCAPE()} for unescaping purposes.
      */
-    public static final Map<String, String> APOS_UNESCAPE;
+    public static final Map<CharSequence, CharSequence> APOS_UNESCAPE;
     static {
         APOS_UNESCAPE = Collections.unmodifiableMap(invert(APOS_ESCAPE));
     }
@@ -783,13 +786,14 @@ public class EntityArrays {
     };
 
     /**
-     * A Map&lt;String, String&gt; to escape the Java control characters.
+     * A Map&lt;CharSequence, CharSequence&gt; to escape the Java
+     * control characters.
      *
      * Namely: {@code \b \n \t \f \r}
      */
-    public static final Map<String, String> JAVA_CTRL_CHARS_ESCAPE;
+    public static final Map<CharSequence, CharSequence> JAVA_CTRL_CHARS_ESCAPE;
     static {
-        Map<String,String> initialMap = new HashMap<>();
+        Map<CharSequence, CharSequence> initialMap = new HashMap<>();
         initialMap.put("\b", "\\b");
         initialMap.put("\n", "\\n");
         initialMap.put("\t", "\\t");
@@ -809,7 +813,7 @@ public class EntityArrays {
     /**
      * Reverse of {@link #JAVA_CTRL_CHARS_ESCAPE} for unescaping purposes.
      */
-    public static final Map<String, String> JAVA_CTRL_CHARS_UNESCAPE;
+    public static final Map<CharSequence, CharSequence> 
JAVA_CTRL_CHARS_UNESCAPE;
     static {
         JAVA_CTRL_CHARS_UNESCAPE = 
Collections.unmodifiableMap(invert(JAVA_CTRL_CHARS_ESCAPE));
     }
@@ -834,9 +838,9 @@ public class EntityArrays {
      * @param map Map&lt;String, String&gt; to be inverted
      * @return Map&lt;String, String&gt; inverted array
      */
-    public static Map<String,String> invert(final Map<String,String> map) {
-        Map<String,String> newMap = new HashMap<>();
-        for(String key: map.keySet()){
+    public static Map<CharSequence, CharSequence> invert(final 
Map<CharSequence, CharSequence> map) {
+        Map<CharSequence, CharSequence> newMap = new HashMap<>();
+        for(CharSequence key: map.keySet()){
             newMap.put(map.get(key), key);
         }
         return newMap;

http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java 
b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
index 8a79580..b381938 100644
--- a/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
+++ b/src/main/java/org/apache/commons/text/translate/LookupTranslator.java
@@ -18,8 +18,10 @@ package org.apache.commons.text.translate;
 
 import java.io.IOException;
 import java.io.Writer;
+import java.security.InvalidParameterException;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
 
 /**
  * Translates a value using a lookup table.
@@ -28,7 +30,7 @@ import java.util.HashSet;
  */
 public class LookupTranslator extends CharSequenceTranslator {
 
-    private final HashMap<String, String> lookupMap;
+    private final Map<String, String> lookupMap;
     private final HashSet<Character> prefixSet;
     private final int shortest;
     private final int longest;
@@ -42,6 +44,7 @@ public class LookupTranslator extends CharSequenceTranslator {
      *
      * @param lookup CharSequence[][] table of size [*][2]
      */
+    @Deprecated
     public LookupTranslator(final CharSequence[]... lookup) {
         lookupMap = new HashMap<>();
         prefixSet = new HashSet<>();
@@ -65,6 +68,41 @@ public class LookupTranslator extends CharSequenceTranslator 
{
     }
 
     /**
+     * Define the lookup table to be used in translation
+     *
+     * Note that, as of Lang 3.1 (the orgin of this code), the key to the 
lookup
+     * table is converted to a java.lang.String. This is because we need the 
key
+     * to support hashCode and equals(Object), allowing it to be the key for a
+     * HashMap. See LANG-882.
+     *
+     * @param lookupMap Map&lt;CharSequence, CharSequence&gt; table of 
translator
+     *                  mappings
+     */
+    public LookupTranslator(final Map<CharSequence, CharSequence> lookupMap) {
+        if (lookupMap == null) {
+            throw new InvalidParameterException("lookupMap cannot be null");
+        }
+        this.lookupMap = new HashMap<>();
+        prefixSet = new HashSet<>();
+        int _shortest = Integer.MAX_VALUE;
+        int _longest = 0;
+        for (final CharSequence key : lookupMap.keySet()) {
+            this.lookupMap.put(key.toString(),
+                    lookupMap.get(key).toString());
+            this.prefixSet.add(key.charAt(0));
+            final int sz = key.length();
+            if (sz < _shortest) {
+                _shortest = sz;
+            }
+            if (sz > _longest) {
+                _longest = sz;
+            }
+        }
+        shortest = _shortest;
+        longest = _longest;
+    }
+
+    /**
      * {@inheritDoc}
      */
     @Override

http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java 
b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java
index 6b4a0ca..1f7c807 100644
--- a/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java
+++ b/src/test/java/org/apache/commons/text/translate/EntityArraysTest.java
@@ -113,9 +113,10 @@ public class EntityArraysTest  {
         testEscapeVsUnescapeMaps(EntityArrays.JAVA_CTRL_CHARS_ESCAPE, 
EntityArrays.JAVA_CTRL_CHARS_UNESCAPE);
     }
 
-    private void testEscapeVsUnescapeMaps(final Map<String,String> escapeMap, 
final Map<String,String> unescapeMap) {
-        for (final String escapeKey : escapeMap.keySet()) {
-            for (final String unescapeKey : unescapeMap.keySet()) {
+    private void testEscapeVsUnescapeMaps(final Map<CharSequence, 
CharSequence> escapeMap,
+                                          final Map<CharSequence, 
CharSequence> unescapeMap) {
+        for (final CharSequence escapeKey : escapeMap.keySet()) {
+            for (final CharSequence unescapeKey : unescapeMap.keySet()) {
                 if (escapeKey == unescapeMap.get(unescapeKey)) {
                     assertEquals(escapeMap.get(escapeKey), unescapeKey);
                 }

http://git-wip-us.apache.org/repos/asf/commons-text/blob/bdfa8309/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java 
b/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java
index e9559a5..ebf7af5 100644
--- a/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java
+++ b/src/test/java/org/apache/commons/text/translate/LookupTranslatorTest.java
@@ -21,17 +21,21 @@ import org.junit.Test;
 
 import java.io.IOException;
 import java.io.StringWriter;
+import java.util.HashMap;
+import java.util.Map;
 
 import static org.junit.Assert.assertEquals;
 
 /**
- * Unit tests for {@link org.apache.commons.text.translate.LookupTranslator}.
+ * Unit tests for {@link LookupTranslator}.
  */
 public class LookupTranslatorTest  {
 
     @Test
     public void testBasicLookup() throws IOException {
-        final org.apache.commons.text.translate.LookupTranslator lt = new 
org.apache.commons.text.translate.LookupTranslator(new CharSequence[][] { { 
"one", "two" } });
+        final Map<CharSequence, CharSequence> translatorMap = new HashMap<>();
+        translatorMap.put("one", "two");
+        final LookupTranslator lt = new LookupTranslator(translatorMap);
         final StringWriter out = new StringWriter();
         final int result = lt.translate("one", 0, out);
         assertEquals("Incorrect codepoint consumption", 3, result);
@@ -41,7 +45,9 @@ public class LookupTranslatorTest  {
     // Tests: https://issues.apache.org/jira/browse/LANG-882
     @Test
     public void testLang882() throws IOException {
-        final org.apache.commons.text.translate.LookupTranslator lt = new 
LookupTranslator(new CharSequence[][] { { new StringBuffer("one"), new 
StringBuffer("two") } });
+        final Map<CharSequence, CharSequence> translatorMap = new HashMap<>();
+        translatorMap.put(new StringBuffer("one"), new StringBuffer("two"));
+        final LookupTranslator lt = new LookupTranslator(translatorMap);
         final StringWriter out = new StringWriter();
         final int result = lt.translate(new StringBuffer("one"), 0, out);
         assertEquals("Incorrect codepoint consumption", 3, result);

Reply via email to