[LanguageTool] SF.net SVN: languagetool:[6230] trunk/JLanguageTool/src

archeus Thu, 12 Jan 2012 04:31:53 -0800

Revision: 6230
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6230&view=rev
Author:   archeus
Date:     2012-01-12 12:31:36 +0000 (Thu, 12 Jan 2012)
Log Message:
-----------
added a new case conversion for Match tag: preserve case


Modified Paths:
--------------
    trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
    trunk/JLanguageTool/src/rules/pattern.xsd

Added Paths:
-----------
    trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java     
2012-01-12 12:28:59 UTC (rev 6229)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Match.java     
2012-01-12 12:31:36 UTC (rev 6230)
@@ -41,7 +41,7 @@
 
   /** Possible string case conversions. **/
   public enum CaseConversion {
-    NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER;
+    NONE, STARTLOWER, STARTUPPER, ALLLOWER, ALLUPPER, PRESERVE;
 
     /**
      * Converts string to the constant enum.
@@ -429,11 +429,9 @@
     }
     String token = s;
     switch (caseConversionType) {
-    case NONE: // preserve case
-      /* 
-        
-        temporary commented out until we agree if this is correct
-        
+    case NONE: 
+       break;
+    case PRESERVE:
       if (StringTools.startsWithUppercase(sample)) {
         if (StringTools.isAllUppercase(formattedToken.getToken())) {
           token =  token.toUpperCase();
@@ -441,7 +439,6 @@
           token = StringTools.uppercaseFirstChar(token);
         }
       }
-      */ 
       break;
     case STARTLOWER:
       token = token.substring(0, 1).toLowerCase() + token.substring(1);

Modified: trunk/JLanguageTool/src/rules/pattern.xsd
===================================================================
--- trunk/JLanguageTool/src/rules/pattern.xsd   2012-01-12 12:28:59 UTC (rev 
6229)
+++ trunk/JLanguageTool/src/rules/pattern.xsd   2012-01-12 12:31:36 UTC (rev 
6230)
@@ -119,6 +119,7 @@
                                                <xs:enumeration 
value="startupper" />
                                                <xs:enumeration 
value="allupper" />
                                                <xs:enumeration 
value="alllower" />
+                                               <xs:enumeration 
value="preserve" />
                                        </xs:restriction>
                                </xs:simpleType>
                        </xs:attribute>

Added: 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
                        (rev 0)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/MatchTest.java 
2012-01-12 12:31:36 UTC (rev 6230)
@@ -0,0 +1,176 @@
+/* LanguageTool, a natural language style checker 
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ * 
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.patterns;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.TestCase;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+
+import org.languagetool.AnalyzedToken;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.rules.patterns.Match.CaseConversion;
+import org.languagetool.rules.patterns.Match.IncludeRange;
+import org.languagetool.synthesis.BaseSynthesizer;
+import org.languagetool.synthesis.ManualSynthesizer;
+import org.languagetool.synthesis.Synthesizer;
+
+/**
+ * Test class for {@link Match}.
+ * @author Ionuț Păduraru
+ */
+public class MatchTest extends TestCase {
+
+       /**
+        *  Adapter from {@link ManualSynthesizer} to {@link Synthesizer}. 
<br/> 
+        *  Note: This could be extracted as a standalone class.
+        */
+       public static class ManualSynthesizerAdapter extends BaseSynthesizer 
implements Synthesizer  {
+               private ManualSynthesizer manualSynthesizer;
+               public ManualSynthesizerAdapter(ManualSynthesizer 
manualSynthesizer) {
+                       super(null, null); // no file
+                       this.manualSynthesizer = manualSynthesizer;
+               }
+               @Override
+               protected void initSynthesizer() throws IOException {
+                       synthesizer = new IStemmer() { // null synthesiser 
+                               @Override
+                               public List<WordData> lookup(CharSequence word) 
{
+                                       return new ArrayList<WordData>();
+                               }
+                       };
+               }
+               @Override
+               protected void initPossibleTags() throws IOException {
+                       if (possibleTags == null) {
+                               possibleTags = new 
ArrayList<String>(manualSynthesizer.getPossibleTags());
+                       }
+               }
+               @Override
+               protected void lookup(String lemma, String posTag, List<String> 
results) {
+                       super.lookup(lemma, posTag, results);
+                       // add words that are missing from the 
romanian_synth.dict file
+                       final List<String> manualForms = 
manualSynthesizer.lookup(lemma.toLowerCase(), posTag);
+                       if (manualForms != null) {
+                               results.addAll(manualForms); 
+                       }
+               }
+       }
+       
+       //-- helper methods
+       
+       private Synthesizer getTestSynthesizer() throws 
UnsupportedEncodingException, IOException {
+                final String data = 
+                             "# some test data\n" +
+                             "inflectedform11\tlemma1\tPOS1\n" +
+                             "inflectedform121\tlemma1\tPOS2\n" +
+                             "inflectedform122\tlemma1\tPOS2\n" +
+                             "inflectedform2\tlemma2\tPOS1\n"
+                             ;
+               return new ManualSynthesizerAdapter(new ManualSynthesizer(new 
ByteArrayInputStream(data.getBytes("UTF-8"))));
+       }
+
+       private AnalyzedTokenReadings getAnalyzedTokenReadings(String token, 
String posTag, String lemma) {
+               return new AnalyzedTokenReadings(new AnalyzedToken(token, 
posTag, lemma), 0);
+       }
+
+       private Match getMatch(String posTag, String posTagReplace, 
CaseConversion caseConversion) throws UnsupportedEncodingException, IOException 
{
+               Match match = new Match(posTag, posTagReplace, true, null, 
null, caseConversion, false, IncludeRange.NONE);
+               match.setSynthesizer(getTestSynthesizer());
+               return match;
+       }
+
+       //-- test methods
+       
+               //-- CASE CONVERSION
+       
+       public void testStartUpper() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.STARTUPPER);
+               match.setToken(getAnalyzedTokenReadings("inflectedform11", 
"POS1", "Lemma1"));
+               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString( match.toFinalString()));
+       }
+
+       public void testStartLower() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.STARTLOWER);
+               match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+
+       public void testAllUpper() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.ALLUPPER);
+               match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString(match.toFinalString()));
+       }
+
+       public void testAllLower() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.ALLLOWER);
+               match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+
+       public void testPreserveStartUpper() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
+               match.setToken(getAnalyzedTokenReadings("InflectedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[Inflectedform121, Inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+       
+       public void testPreserveStartLower() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
+               match.setToken(getAnalyzedTokenReadings("inflectedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+       
+       public void testPreserveAllUpper() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
+               match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", 
"POS1", "Lemma1"));
+               assertEquals("[INFLECTEDFORM121, INFLECTEDFORM122]", 
Arrays.toString( match.toFinalString()));
+       }
+       
+       public void testPreserveMixed() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.PRESERVE);
+               match.setToken(getAnalyzedTokenReadings("inflecTedForm11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+               
+       }
+
+       public void testPreserveNoneUpper() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
+               match.setToken(getAnalyzedTokenReadings("INFLECTEDFORM11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+       
+       public void testPreserveNoneLower() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
+               match.setToken(getAnalyzedTokenReadings("inflectedform11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+       
+       public void testPreserveNoneMixed() throws Exception {
+               Match match = getMatch("POS1", "POS2", 
Match.CaseConversion.NONE);
+               match.setToken(getAnalyzedTokenReadings("inFLectedFOrm11", 
"POS1", "Lemma1"));
+               assertEquals("[inflectedform121, inflectedform122]", 
Arrays.toString(match.toFinalString()));
+       }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
RSA(R) Conference 2012
Mar 27 - Feb 2
Save $400 by Jan. 27
Register now!
http://p.sf.net/sfu/rsa-sfdev2dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[6230] trunk/JLanguageTool/src

Reply via email to