Revision: 7511
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7511&view=rev
Author:   milek_pl
Date:     2012-06-26 10:18:59 +0000 (Tue, 26 Jun 2012)
Log Message:
-----------
fix outstanding problems with ca dictionary, add a simple tool to tokenize 
wordlists

Modified Paths:
--------------
    trunk/JLanguageTool/build.xml
    trunk/JLanguageTool/src/resource/ca/hunspell/ca_ES.dict
    
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java

Added Paths:
-----------
    trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java

Modified: trunk/JLanguageTool/build.xml
===================================================================
--- trunk/JLanguageTool/build.xml       2012-06-26 08:05:38 UTC (rev 7510)
+++ trunk/JLanguageTool/build.xml       2012-06-26 10:18:59 UTC (rev 7511)
@@ -859,4 +859,19 @@
        
 </target>
 
+       <!-- ================================================================== 
-->
+       <!-- WordTokenizer - just a helper app -->
+       <!-- ================================================================== 
-->             
+               
+       <target name="wtokenizer" depends="common" description="Builds a word 
tokenizer">
+               
+       <jar jarfile="${dist.dir}/wordtokenizer.jar" basedir="${build.classes}" 
includes="org/languagetool/dev/**">
+               <manifest>
+                       <attribute name="Main-Class" 
value="org.languagetool.dev.WordTokenizer" />
+                       <attribute name="Class-Path" value="./ 
./LanguageTool.jar ./${segment.lib} ./${logging.lib}" />
+               </manifest>
+       </jar>
+       
+       </target>
+       
 </project>

Added: trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java         
                (rev 0)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java 
2012-06-26 10:18:59 UTC (rev 7511)
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker 
+* Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+* 
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+* USA
+*/
+
+package org.languagetool.dev;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+
+
+/**
+ * Used for tokenizing word lists for the MorfologikSpeller.
+ * 
+ * @author Marcin Miłkowski
+ */
+public final class WordTokenizer {
+
+
+     public static void main(final String[] args) throws IOException {
+        final WordTokenizer prg = new WordTokenizer();
+        if (args.length != 1) {
+            System.err.println("Please supply the language code as the only 
argument.");
+            System.exit(-1);
+        }
+        prg.run(args[0]);
+      }
+
+      private void run(final String lang) throws IOException {
+        
+        JLanguageTool langTool = new JLanguageTool(
+                Language.getLanguageForShortName(lang));
+        BufferedReader in = null;
+        try {
+            in = new BufferedReader(new InputStreamReader(System.in));
+            BufferedWriter out = new BufferedWriter(new 
OutputStreamWriter(System.out));
+            String line;
+            while ((line = in.readLine()) != null) {
+                AnalyzedTokenReadings[] atr = 
langTool.getRawAnalyzedSentence(line).
+                        getTokensWithoutWhitespace();
+                for (AnalyzedTokenReadings a : atr) {
+                    out.write(a.getToken());
+                    out.write("\n");
+                }
+                
+            }
+        }
+        catch (IOException e) {
+            System.err.println("IOException reading System.in" + e);
+            throw e;
+        }
+        finally {
+            if (in != null) {
+                in.close();
+            }
+        }      
+    }
+
+}


Property changes on: 
trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
___________________________________________________________________
Added: svn:mime-type
   + text/plain

Modified: trunk/JLanguageTool/src/resource/ca/hunspell/ca_ES.dict
===================================================================
(Binary files differ)

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
        2012-06-26 08:05:38 UTC (rev 7510)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
        2012-06-26 10:18:59 UTC (rev 7511)
@@ -45,10 +45,10 @@
         assertEquals(0, 
rule.match(langTool.getAnalyzedSentence("Abacallanada")).length);
         assertEquals(0, 
rule.match(langTool.getAnalyzedSentence("Abatre-les-en")).length);
         
-        //assertEquals(0, rule.match(langTool.getAnalyzedSentence("Allò que 
més l'interessa.")).length);
+        assertEquals(0, rule.match(langTool.getAnalyzedSentence("Allò que més 
l'interessa.")).length);
         // checks that "WORDCHARS ·-'" is added to Hunspell .aff file
-        // assertEquals(0, rule.match(langTool.getAnalyzedSentence("Porta'n 
quatre al col·legi.")).length);
-//        assertEquals(0, rule.match(langTool.getAnalyzedSentence("Has de 
portar-me'n moltes.")).length);
+        assertEquals(0, rule.match(langTool.getAnalyzedSentence("Porta'n 
quatre al col·legi.")).length);
+        assertEquals(0, rule.match(langTool.getAnalyzedSentence("Has de 
portar-me'n moltes.")).length);
         assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
         
         
@@ -72,7 +72,7 @@
         assertEquals(1, matches.length);
         assertEquals(0, matches[0].getFromPos());
         assertEquals(5, matches[0].getToPos());
-       // assertEquals("Pera", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
         
         assertEquals(1, 
rule.match(langTool.getAnalyzedSentence("aõh")).length);
         assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to