Reviewers: MikeSamuel,
Description:
If the table construction inserted "a/b/c" and then "b/c", it would fall
into an infinite loop, rather than accepting that "b/c" cannot be
improved on.
Also add comments to the algorithm.
Please review this at https://codereview.appspot.com/286250043/
Affected files (+55, -14 lines):
M src/com/google/caja/util/Abbreviator.java
M tests/com/google/caja/util/AbbreviatorTest.java
Index: src/com/google/caja/util/Abbreviator.java
diff --git a/src/com/google/caja/util/Abbreviator.java
b/src/com/google/caja/util/Abbreviator.java
index
2366242e559d8225fa56e298fc32c68a1607fea2..109242ca4820506ab9c264913a236767004c1cfc
100644
--- a/src/com/google/caja/util/Abbreviator.java
+++ b/src/com/google/caja/util/Abbreviator.java
@@ -52,9 +52,9 @@ public final class Abbreviator {
*/
public Abbreviator(Set<String> items, String sep) {
this.sep = sep;
- Map<String, String> abbrevToUri = new HashMap<String, String>();
- for (String item : items) { insert(abbrevToUri, item, ""); }
- for (Map.Entry<String, String> e : abbrevToUri.entrySet()) {
+ Map<String, String> abbrevToItem = new HashMap<String, String>();
+ for (String item : items) { insert(abbrevToItem, item, ""); }
+ for (Map.Entry<String, String> e : abbrevToItem.entrySet()) {
if (e.getValue() != null) {
itemToAbbrev.put(e.getValue(), e.getKey());
}
@@ -71,19 +71,45 @@ public final class Abbreviator {
return abbrev != null ? abbrev : item;
}
+ /**
+ * Insert an entry in a map of abbreviations to items.
+ *
+ * @param abbrevToItem The table. Null-valued entries mark ambiguous
+ * abbreviations.
+ * @param item The unabbreviated item to insert.
+ * @param abbrev The shortest abbreviation so far known to be
insufficiently
+ * specific; the inserted abbreviation will always be longer than
this
+ * if possible.
+ */
private void insert(
- Map<String, String> abbrevToUri, String item, String abbrev) {
+ Map<String, String> abbrevToItem, String item, String abbrev) {
+ // Find the next longer abbreviation to attempt.
abbrev = expand(item, abbrev);
- if (!abbrevToUri.containsKey(abbrev)) {
- abbrevToUri.put(abbrev, item);
+ if (!abbrevToItem.containsKey(abbrev)) {
+ // It is unambiguous; just insert it.
+ abbrevToItem.put(abbrev, item);
} else {
- String other = abbrevToUri.get(abbrev);
- if (!item.equals(other)) {
+ // It conflicts with an existing (longer or equal) abbreviation.
+ String other = abbrevToItem.get(abbrev);
+ if (!item.equals(other)) { // Skip if exact item already present.
if (!abbrev.equals(other)) {
- abbrevToUri.put(abbrev, null);
- if (other != null) { insert(abbrevToUri, other, abbrev); }
+ // The other item can be expressed longer.
+ // (If this condition is false, then the other item is a suffix
of
+ // this one, and so the other item is left as-is.)
+
+ // Mark this abbreviation as ambiguous.
+ abbrevToItem.put(abbrev, null);
+ // Re-insert the other item with its longer abbreviation.
+ if (other != null) { insert(abbrevToItem, other, abbrev); }
+ }
+ if (other == null && item.equals(abbrev)) {
+ // Item is a suffix of an existing item. Insert it and do not
attempt
+ // to find a longer abbreviation (which would infinitely
recurse).
+ abbrevToItem.put(abbrev, item);
+ } else {
+ // Try again to insert this item, with a longer abbreviation.
+ insert(abbrevToItem, item, abbrev);
}
- insert(abbrevToUri, item, abbrev);
}
}
}
Index: tests/com/google/caja/util/AbbreviatorTest.java
diff --git a/tests/com/google/caja/util/AbbreviatorTest.java
b/tests/com/google/caja/util/AbbreviatorTest.java
index
e727aacbb9723ddc2d16dcbf1651752102a6166c..71dcc57efa9949354ddb20d10067ea9d5fc7f30d
100644
--- a/tests/com/google/caja/util/AbbreviatorTest.java
+++ b/tests/com/google/caja/util/AbbreviatorTest.java
@@ -17,6 +17,7 @@ package com.google.caja.util;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
+import java.util.LinkedHashSet;
import junit.framework.TestCase;
@@ -30,7 +31,7 @@ public class AbbreviatorTest extends TestCase {
public final void testAbbreviator() {
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"/a/b/c", "/a/d/e", "/a/d/f", "/a/g/h/c",
"/h/i", "/h/j/", "/", "/c", "/d", "x", "")),
"/");
@@ -48,9 +49,10 @@ public class AbbreviatorTest extends TestCase {
assertEquals("/notpresent",
a.unambiguousAbbreviationFor("/notpresent"));
}
- public final void testSetContainsSuffixOfOtherMember() {
+ public final void testSetContainsSuffixOfOtherMember_Ordering1() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
Abbreviator a = new Abbreviator(
- new HashSet<String>(Arrays.asList(
+ new LinkedHashSet<String>(Arrays.asList(
"foo:bar/z", "foo:foo:baz/z", "foo:baz/z", "foo:bar:far/z")),
":");
assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
@@ -59,4 +61,17 @@ public class AbbreviatorTest extends TestCase {
"foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
}
+
+ public final void testSetContainsSuffixOfOtherMember_Ordering2() {
+ // Using order-preserving LinkedHashSet to test specific insertion
ordering.
+ Abbreviator a = new Abbreviator(
+ new LinkedHashSet<String>(Arrays.asList(
+ "foo:bar/z", "foo:baz/z", "foo:foo:baz/z", "foo:bar:far/z")),
+ ":");
+ assertEquals("bar/z", a.unambiguousAbbreviationFor("foo:bar/z"));
+ assertEquals("foo:baz/z", a.unambiguousAbbreviationFor("foo:baz/z"));
+ assertEquals(
+ "foo:foo:baz/z", a.unambiguousAbbreviationFor("foo:foo:baz/z"));
+ assertEquals("far/z", a.unambiguousAbbreviationFor("foo:bar:far/z"));
+ }
}
--
---
You received this message because you are subscribed to the Google Groups "Google Caja Discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.