Revision: 6230
http://languagetool.svn.sourceforge.net/languagetool/?rev=6230&view=rev
Author: archeus
Date: 2012-01-12 12:31:36 +0000 (Thu, 12 Jan 2012)
Log Message:
-----------
added a new case conversion for Match tag: preserve case
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
trunk/JLanguageTool/src/rules/pattern.xsd
Added Paths:
-----------
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
2012-01-12 12:28:59 UTC (rev 6229)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
2012-01-12 12:31:36 UTC (rev 6230)
@@ -41,7 +41,7 @@
/** Possible string case conversions. **/
public enum CaseConversion {
- NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER;
+ NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER, PRESERVE;
/**
* Converts string to the constant enum.
@@ -429,11 +429,9 @@
}
String token = s;
switch (caseConversionType) {
- case NONE: // preserve case
- /*
-
- temporary commented out until we agree if this is correct
-
+ case NONE:
+ break;
+ case PRESERVE:
if (StringTools.startsWithUppercase(sample)) {
if (StringTools.isAllUppercase(formattedToken.getToken())) {
token = token.toUpperCase();
@@ -441,7 +439,6 @@
token = StringTools.uppercaseFirstChar(token);
}
}
- */
break;
case STARTLOWER:
token = token.substring(0, 1).toLowerCase() + token.substring(1);
Modified: trunk/JLanguageTool/src/rules/pattern.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/pattern.xsd 2012-01-12 12:28:59 UTC (rev
6229)
+++ trunk/JLanguageTool/src/rules/pattern.xsd 2012-01-12 12:31:36 UTC (rev
6230)
@@ -119,6 +119,7 @@
<xs:enumeration
value="startupper" />
<xs:enumeration
value="allupper" />
<xs:enumeration
value="alllower" />
+ <xs:enumeration
value="preserve" />
</xs:restriction>
</xs:simpleType>
</xs:attribute>
Added:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
(rev 0)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
2012-01-12 12:31:36 UTC (rev 6230)
@@ -0,0 +1,176 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.patterns;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.TestCase;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.rules.patterns.Match.CaseConversion;
+import org.languagetool.rules.patterns.Match.IncludeRange;
+import org.languagetool.synthesis.BaseSynthesizer;
+import org.languagetool.synthesis.ManualSynthesizer;
+import org.languagetool.synthesis.Synthesizer;
+
+/**
+ * Test class for {@link Match}.
+ * @author Ionuț Păduraru
+ */
+public class MatchTest extends TestCase {
+
+ /**
+ * Adapter from {@link ManualSynthesizer} to {@link Synthesizer}.
<br/>
+ * Note: This could be extracted as a standalone class.
+ */
+ public static class ManualSynthesizerAdapter extends BaseSynthesizer
implements Synthesizer {
+ private ManualSynthesizer manualSynthesizer;
+ public ManualSynthesizerAdapter(ManualSynthesizer
manualSynthesizer) {
+ super(null, null); // no file
+ this.manualSynthesizer = manualSynthesizer;
+ }
+ @Override
+ protected void initSynthesizer() throws IOException {
+ synthesizer = new IStemmer() { // null synthesiser
+ @Override
+ public List<WordData> lookup(CharSequence word)
{
+ return new ArrayList<WordData>();
+ }
+ };
+ }
+ @Override
+ protected void initPossibleTags() throws IOException {
+ if (possibleTags == null) {
+ possibleTags = new
ArrayList<String>(manualSynthesizer.getPossibleTags());
+ }
+ }
+ @Override
+ protected void lookup(String lemma, String posTag, List<String>
results) {
+ super.lookup(lemma, posTag, results);
+ // add words that are missing from the
romanian_synth.dict file
+ final List<String> manualForms =
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
+ if (manualForms != null) {
+ results.addAll(manualForms);
+ }
+ }
+ }
+
+ //-- helper methods
+
+ private Synthesizer getTestSynthesizer() throws
UnsupportedEncodingException, IOException {
+ final String data =
+ "# some test data\n" +
+ "inflectedform11\tlemma1\tPOS1\n" +
+ "inflectedform121\tlemma1\tPOS2\n" +
+ "inflectedform122\tlemma1\tPOS2\n" +
+ "inflectedform2\tlemma2\tPOS1\n"
+ ;
+ return new ManualSynthesizerAdapter(new ManualSynthesizer(new
ByteArrayInputStream(data.getBytes("UTF-8"))));
+ }
+
+ private AnalyzedTokenReadings getAnalyzedTokenReadings(String token,
String posTag, String lemma) {
+ return new AnalyzedTokenReadings(new AnalyzedToken(token,
posTag, lemma), 0);
+ }
+
+ private Match getMatch(String posTag, String posTagReplace,
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException
{
+ Match match = new Match(posTag, posTagReplace, true, null,
null, caseConversion, false, IncludeRange.NONE);
+ match.setSynthesizer(getTestSynthesizer());
+ return match;
+ }
+
+ //-- test methods
+
+ //-- CASE CONVERSION
+
+ public void testStartUpper() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.STARTUPPER);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11",
"POS1", "Lemma1"));
+ assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString( match.toFinalString()));
+ }
+
+ public void testStartLower() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.STARTLOWER);
+ match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testAllUpper() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.ALLUPPER);
+ match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
+ assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testAllLower() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.ALLLOWER);
+ match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPreserveStartUpper() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
+ match.setToken(getAnalyzedTokenReadings("InflectedForm11",
"POS1", "Lemma1"));
+ assertEquals("[Inflectedform121, Inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPreserveStartLower() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
+ match.setToken(getAnalyzedTokenReadings("inflectedForm11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPreserveAllUpper() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
+ match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11",
"POS1", "Lemma1"));
+ assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]",
Arrays.toString( match.toFinalString()));
+ }
+
+ public void testPreserveMixed() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.PRESERVE);
+ match.setToken(getAnalyzedTokenReadings("inflecTedForm11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+
+ }
+
+ public void testPreserveNoneUpper() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
+ match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPreserveNoneLower() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
+ match.setToken(getAnalyzedTokenReadings("inflectedform11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+ public void testPreserveNoneMixed() throws Exception {
+ Match match = getMatch("POS1", "POS2",
Match.CaseConversion.NONE);
+ match.setToken(getAnalyzedTokenReadings("inFLectedFOrm11",
"POS1", "Lemma1"));
+ assertEquals("[inflectedform121, inflectedform122]",
Arrays.toString(match.toFinalString()));
+ }
+
+}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
RSA(R) Conference 2012
Mar 27 - Feb 2
Save $400 by Jan. 27
Register now!
http://p.sf.net/sfu/rsa-sfdev2dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs