Author: ggregory
Date: Fri Aug 5 15:33:28 2011
New Revision: 1154269
URL: http://svn.apache.org/viewvc?rev=1154269&view=rev
Log:
[CODEC-125] Implement a Beider-Morse phonetic matching codec. Apply Matthew's
patch https://issues.apache.org/jira/secure/attachment/12489480/handleH.patch.
Also: Reduce speed test loop boundary.
Modified:
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
Modified:
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
(original)
+++
commons/proper/codec/trunk/src/java/org/apache/commons/codec/language/bm/Rule.java
Fri Aug 5 15:33:28 2011
@@ -143,13 +143,13 @@ public class Rule {
Languages ls = Languages.instance(s);
for (String l : ls.getLanguages()) {
try {
- rs.put(l, parseRules(createScanner(s, rt, l)));
+ rs.put(l, parseRules(createScanner(s, rt, l),
createResourceName(s, rt, l)));
} catch (IllegalStateException e) {
throw new IllegalStateException("Problem processing "
+ createResourceName(s, rt, l), e);
}
}
if (!rt.equals(RuleType.RULES)) {
- rs.put("common", parseRules(createScanner(s, rt,
"common")));
+ rs.put("common", parseRules(createScanner(s, rt,
"common"), createResourceName(s, rt, "common")));
}
rts.put(rt, Collections.unmodifiableMap(rs));
@@ -262,7 +262,7 @@ public class Rule {
}
}
- private static List<Rule> parseRules(Scanner scanner) {
+ private static List<Rule> parseRules(final Scanner scanner, final String
location) {
List<Rule> lines = new ArrayList<Rule>();
int currentLine = 0;
@@ -300,7 +300,7 @@ public class Rule {
if (incl.contains(" ")) {
System.err.println("Warining: malformed import
statement: " + rawLine);
} else {
- lines.addAll(parseRules(createScanner(incl)));
+ lines.addAll(parseRules(createScanner(incl),
location + "->" + incl));
}
} else {
// rule
@@ -313,7 +313,21 @@ public class Rule {
String lCon = stripQuotes(parts[1]);
String rCon = stripQuotes(parts[2]);
PhonemeExpr ph =
parsePhonemeExpr(stripQuotes(parts[3]));
- Rule r = new Rule(pat, lCon, rCon, ph);
+ final int cLine = currentLine;
+ Rule r = new Rule(pat, lCon, rCon, ph) {
+ private final int line = cLine;
+ private final String loc = location;
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new
StringBuilder();
+ sb.append("Rule");
+ sb.append("{line=").append(line);
+ sb.append(",
loc='").append(loc).append('\'');
+ sb.append('}');
+ return sb.toString();
+ }
+ };
lines.add(r);
} catch (IllegalArgumentException e) {
throw new IllegalStateException("Problem
parsing line " + currentLine, e);
Modified:
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
(original)
+++
commons/proper/codec/trunk/src/resources/org/apache/commons/codec/language/bm/gen_rules_any.txt
Fri Aug 5 15:33:28 2011
@@ -123,8 +123,8 @@
"gh" "" "[ei]" "(g[romanian+italian+greeklatin]|gh)"
"ouh" "" "[aioe]" "(v[french]|uh)"
-"uh" "" "[aioe]" "(v|uh)"
-"h" "" "$" ""
+"uh" "" "[aioe]" "(v|uh)"
+"h" "." "$" "" // match h at the end of words, but not as a single letter
"h" "[aeiouyäöü]" "" "" // german
"h" "^" ""
"(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])"
Modified:
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java?rev=1154269&r1=1154268&r2=1154269&view=diff
==============================================================================
---
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
(original)
+++
commons/proper/codec/trunk/src/test/org/apache/commons/codec/language/bm/BeiderMorseEncoderTest.java
Fri Aug 5 15:33:28 2011
@@ -25,7 +25,6 @@ import org.apache.commons.codec.EncoderE
import org.apache.commons.codec.StringEncoder;
import org.apache.commons.codec.StringEncoderAbstractTest;
import org.junit.Assert;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -44,7 +43,7 @@ public class BeiderMorseEncoderTest exte
return new BeiderMorseEncoder();
}
- @Ignore
+ // @Ignore
@Test
public void testAsciiEncodeNotEmpty1Letter() throws EncoderException {
BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -113,7 +112,6 @@ public class BeiderMorseEncoderTest exte
Languages.instance("thereIsNoSuchLanguage");
}
- // @Ignore
@Test(timeout = 10000L)
public void testLongestEnglishSurname() throws EncoderException {
BeiderMorseEncoder bmpm = new BeiderMorseEncoder();
@@ -165,7 +163,7 @@ public class BeiderMorseEncoderTest exte
Random rand = new Random();
stringBuffer.append(chars[rand.nextInt(chars.length)]);
long start;
- for (int i = 0; i < 40; i++) {
+ for (int i = 0; i < 30; i++) {
start = System.currentTimeMillis();
// System.out.println(i + " String to encode:" +
stringBuffer.toString());
bmpm.encode(stringBuffer.toString());