Revision: 7511
http://languagetool.svn.sourceforge.net/languagetool/?rev=7511&view=rev
Author: milek_pl
Date: 2012-06-26 10:18:59 +0000 (Tue, 26 Jun 2012)
Log Message:
-----------
fix outstanding problems with ca dictionary, add a simple tool to tokenize
wordlists
Modified Paths:
--------------
trunk/JLanguageTool/build.xml
trunk/JLanguageTool/src/resource/ca/hunspell/ca_ES.dict
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
Added Paths:
-----------
trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
Modified: trunk/JLanguageTool/build.xml
===================================================================
--- trunk/JLanguageTool/build.xml 2012-06-26 08:05:38 UTC (rev 7510)
+++ trunk/JLanguageTool/build.xml 2012-06-26 10:18:59 UTC (rev 7511)
@@ -859,4 +859,19 @@
</target>
+ <!-- ==================================================================
-->
+ <!-- WordTokenizer - just a helper app -->
+ <!-- ==================================================================
-->
+
+ <target name="wtokenizer" depends="common" description="Builds a word
tokenizer">
+
+ <jar jarfile="${dist.dir}/wordtokenizer.jar" basedir="${build.classes}"
includes="org/languagetool/dev/**">
+ <manifest>
+ <attribute name="Main-Class"
value="org.languagetool.dev.WordTokenizer" />
+ <attribute name="Class-Path" value="./
./LanguageTool.jar ./${segment.lib} ./${logging.lib}" />
+ </manifest>
+ </jar>
+
+ </target>
+
</project>
Added: trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
===================================================================
--- trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
(rev 0)
+++ trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
2012-06-26 10:18:59 UTC (rev 7511)
@@ -0,0 +1,80 @@
+/* LanguageTool, a natural language style checker
+* Copyright (C) 2006 Daniel Naber (http://www.danielnaber.de)
+*
+* This library is free software; you can redistribute it and/or
+* modify it under the terms of the GNU Lesser General Public
+* License as published by the Free Software Foundation; either
+* version 2.1 of the License, or (at your option) any later version.
+*
+* This library is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+* Lesser General Public License for more details.
+*
+* You should have received a copy of the GNU Lesser General Public
+* License along with this library; if not, write to the Free Software
+* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+* USA
+*/
+
+package org.languagetool.dev;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.OutputStreamWriter;
+
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+
+
+/**
+ * Used for tokenizing word lists for the MorfologikSpeller.
+ *
+ * @author Marcin Miłkowski
+ */
+public final class WordTokenizer {
+
+
+ public static void main(final String[] args) throws IOException {
+ final WordTokenizer prg = new WordTokenizer();
+ if (args.length != 1) {
+ System.err.println("Please supply the language code as the only
argument.");
+ System.exit(-1);
+ }
+ prg.run(args[0]);
+ }
+
+ private void run(final String lang) throws IOException {
+
+ JLanguageTool langTool = new JLanguageTool(
+ Language.getLanguageForShortName(lang));
+ BufferedReader in = null;
+ try {
+ in = new BufferedReader(new InputStreamReader(System.in));
+ BufferedWriter out = new BufferedWriter(new
OutputStreamWriter(System.out));
+ String line;
+ while ((line = in.readLine()) != null) {
+ AnalyzedTokenReadings[] atr =
langTool.getRawAnalyzedSentence(line).
+ getTokensWithoutWhitespace();
+ for (AnalyzedTokenReadings a : atr) {
+ out.write(a.getToken());
+ out.write("\n");
+ }
+
+ }
+ }
+ catch (IOException e) {
+ System.err.println("IOException reading System.in" + e);
+ throw e;
+ }
+ finally {
+ if (in != null) {
+ in.close();
+ }
+ }
+ }
+
+}
Property changes on:
trunk/JLanguageTool/src/dev/org/languagetool/dev/WordTokenizer.java
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Modified: trunk/JLanguageTool/src/resource/ca/hunspell/ca_ES.dict
===================================================================
(Binary files differ)
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2012-06-26 08:05:38 UTC (rev 7510)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2012-06-26 10:18:59 UTC (rev 7511)
@@ -45,10 +45,10 @@
assertEquals(0,
rule.match(langTool.getAnalyzedSentence("Abacallanada")).length);
assertEquals(0,
rule.match(langTool.getAnalyzedSentence("Abatre-les-en")).length);
- //assertEquals(0, rule.match(langTool.getAnalyzedSentence("Allò que
més l'interessa.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Allò que més
l'interessa.")).length);
// checks that "WORDCHARS ·-'" is added to Hunspell .aff file
- // assertEquals(0, rule.match(langTool.getAnalyzedSentence("Porta'n
quatre al col·legi.")).length);
-// assertEquals(0, rule.match(langTool.getAnalyzedSentence("Has de
portar-me'n moltes.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Porta'n
quatre al col·legi.")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Has de
portar-me'n moltes.")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
@@ -72,7 +72,7 @@
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(5, matches[0].getToPos());
- // assertEquals("Pera", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
assertEquals(1,
rule.match(langTool.getAnalyzedSentence("aõh")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs