Revision: 10033
http://sourceforge.net/p/languagetool/code/10033
Author: arysin
Date: 2013-05-02 02:09:11 +0000 (Thu, 02 May 2013)
Log Message:
-----------
Ukrainian tokenizer, dictionary, and grammar updates
New tests for Ukrainian module
Modified Paths:
--------------
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/rules/uk/SimpleReplaceRule.java
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizer.java
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/src/make-dict-uk-mfl.sh
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/ukrainian.dict
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/ukrainian_synth.dict
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/grammar.xml
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/replace.txt
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/SimpleReplaceRuleTest.java
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianSRXSentenceTokenizerTest.java
Added Paths:
-----------
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRuleTest.java
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizerTest.java
Modified:
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
===================================================================
---
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
2013-05-02 02:09:11 UTC (rev 10033)
@@ -4748,10 +4748,10 @@
<beforebreak>\b\d+\.\s</beforebreak>
<afterbreak>\p{Ll}|\p{Lu}{2,}</afterbreak>
</rule>
-<!-- TODO: Name.Surname - does not work for some reason -->
+<!-- N.Surname -->
<rule break="no">
-<beforebreak>\b[А-ЯІЇЄҐ]\.[А-ЯІЇЄҐ][а-яіїєґ]+\s</beforebreak>
-<afterbreak></afterbreak>
+<beforebreak>\b[А-ЯІЇЄҐ]\.</beforebreak>
+<afterbreak>[А-ЯІЇЄҐ][а-яіїєґ'-]+</afterbreak>
</rule>
<!-- capital char abbreviations А. Б. В. -->
<rule break="no">
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/rules/uk/SimpleReplaceRule.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/rules/uk/SimpleReplaceRule.java
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/rules/uk/SimpleReplaceRule.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -19,12 +19,27 @@
package org.languagetool.rules.uk;
import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
import java.util.ResourceBundle;
+import java.util.Scanner;
-import org.languagetool.rules.AbstractSimpleReplaceRule;
+import org.apache.commons.lang.StringUtils;
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.JLanguageTool;
+import org.languagetool.rules.Category;
+import org.languagetool.rules.Rule;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.tools.StringTools;
/**
- * A rule that matches words or phrases which should not be used and suggests
+ * A rule that matches words which should not be used and suggests
* correct ones instead.
*
* Ukrainian implementations. Loads the
@@ -32,16 +47,22 @@
*
* @author Andriy Rysin
*/
-public class SimpleReplaceRule extends AbstractSimpleReplaceRule {
+public class SimpleReplaceRule extends Rule {
+ private static final String FILE_ENCODING = "utf-8";
private static final String FILE_NAME = "/uk/replace.txt";
- @Override
+ private final Map<String, List<String>> wrongWords;
+
public final String getFileName() {
return FILE_NAME;
}
+
public SimpleReplaceRule(final ResourceBundle messages) throws IOException {
- super(messages);
+ if (messages != null) {
+ super.setCategory(new Category(messages.getString("category_misc")));
+ }
+ wrongWords =
loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(getFileName()));
}
@Override
@@ -54,15 +75,99 @@
return "Пошук помилкових слів";
}
- @Override
public String getShort() {
return "Помилка?";
}
- @Override
public String getSuggestion() {
return " - помилкове слово, виправлення: ";
}
+ /**
+ * Indicates if the rule is case-sensitive.
+ * @return true if the rule is case-sensitive, false otherwise.
+ */
+ public boolean isCaseSensitive() {
+ return false;
+ }
+
+ /**
+ * @return the locale used for case conversion when {@link
#isCaseSensitive()} is set to <code>false</code>.
+ */
+ public Locale getLocale() {
+ return Locale.getDefault();
+ }
+
+ public String getEncoding() {
+ return FILE_ENCODING;
+ }
+ @Override
+ public final RuleMatch[] match(final AnalyzedSentence text) {
+ List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+
+ for (AnalyzedTokenReadings tokenReadings: tokens) {
+ String tokenString = tokenReadings.getToken();
+
+ List<String> replacements = isCaseSensitive() ?
wrongWords.get(tokenString) :
wrongWords.get(tokenString.toLowerCase(getLocale()));
+
+ if (replacements != null && replacements.size() > 0 ) {
+ RuleMatch potentialRuleMatch =
createRuleMatch(tokenReadings, replacements);
+ ruleMatches.add(potentialRuleMatch);
+ }
+ }
+ return toRuleMatchArray(ruleMatches);
+ }
+
+ private RuleMatch createRuleMatch(AnalyzedTokenReadings tokenReadings,
List<String> replacements) {
+ String tokenString = tokenReadings.getToken();
+ String origToken = tokenString;
+ String msg = tokenString + getSuggestion() +
StringUtils.join(replacements, ", ");
+ int pos = tokenReadings.getStartPos();
+
+ RuleMatch potentialRuleMatch = new RuleMatch(this, pos, pos +
origToken.length(), msg, getShort());
+
+ if (!isCaseSensitive() &&
StringTools.startsWithUppercase(tokenString)) {
+ for(int i = 0; i < replacements.size(); i++) {
+ replacements.set(i,
StringTools.uppercaseFirstChar(replacements.get(i)));
+ }
+ }
+
+ potentialRuleMatch.setSuggestedReplacements(replacements);
+
+ return potentialRuleMatch;
+ }
+
+ private Map<String, List<String>> loadWords(final InputStream stream) throws
IOException {
+ Map<String, List<String>> map = new HashMap<String, List<String>>();
+ Scanner scanner = new Scanner(stream, getEncoding());
+
+ try {
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine();
+ if (line.length() < 1 || line.charAt(0) == '#') { // # = comment
+ continue;
+ }
+
+ String[] parts = line.split("=");
+ if (parts.length != 2) {
+ throw new IOException("Format error in file "
+ +
JLanguageTool.getDataBroker().getFromRulesDirAsUrl(getFileName()) + ", line: "
+ line);
+ }
+
+ String[] replacements = parts[1].split("\\|");
+
+ map.put(parts[0], Arrays.asList(replacements));
+ }
+ } finally {
+ scanner.close();
+ }
+ return map;
+ }
+
+ @Override
+ public void reset() {
+ }
+
}
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizer.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizer.java
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/main/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizer.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -43,9 +43,11 @@
}
@Override
- public List<String> tokenize(final String text) {
- final List<String> tokenList = new ArrayList<String>();
- final StringTokenizer st = new StringTokenizer(text, SPLIT_CHARS, true);
+ public List<String> tokenize(String text) {
+ text = cleanupSentence(text);
+
+ List<String> tokenList = new ArrayList<String>();
+ StringTokenizer st = new StringTokenizer(text, SPLIT_CHARS, true);
while (st.hasMoreElements()) {
tokenList.add( clean(st.nextToken()) );
@@ -54,6 +56,11 @@
return tokenList;
}
+ // remove name abbreviation from name+surname, e.g. Т.Шевченко
+ private String cleanupSentence(String text) {
+ return
text.replaceAll("(\\s)[А-ЯІЇЄҐ]\\.([А-ЯІЇЄҐ]\\.)?([А-ЯІЇЄҐ][а-яіїєґ'-]+)",
"$1$3");
+ }
+
private static String clean(String token) {
return token.replace("\u0301", "").replace('’', '\'').replace('ʼ', '\'');
}
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/src/make-dict-uk-mfl.sh
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/src/make-dict-uk-mfl.sh
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/src/make-dict-uk-mfl.sh
2013-05-02 02:09:11 UTC (rev 10033)
@@ -17,7 +17,7 @@
echo "Generating synthesizer dictionary"
-awk -F '\t' '{print $2"|"$3"\t"$1"\t"}' all.tagged.tmp | \
+grep -v ":bad" all.tagged.tmp | awk -F '\t' '{print $2"|"$3"\t"$1"\t"}' | \
$MFL_CMD tab2morph | \
$MFL_CMD fsa_build $FSA_FLAGS -o ukrainian_synth.dict
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/ukrainian.dict
===================================================================
(Binary files differ)
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/resource/uk/ukrainian_synth.dict
===================================================================
(Binary files differ)
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/grammar.xml
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/grammar.xml
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/grammar.xml
2013-05-02 02:09:11 UTC (rev 10033)
@@ -125,12 +125,13 @@
</rule>
<rule>
<pattern case_sensitive="yes">
- <token negate="yes"
regexp="yes">[\p{Punct}–—\(«АаІі]|Це<exception
postag="SENT_START"></exception></token>
+ <token negate="yes"
regexp="yes">[\p{Punct}–—\(«АаІі]|[Цц]е<exception
postag="SENT_START"></exception></token>
<token regexp="yes">по-перше|по-друге|по-третє</token>
+ <token negate="yes">ж</token>
</pattern>
<message>Відсутня ліва кома: <suggestion>\1,
\2</suggestion>.</message>
<example type="correct">Це по-перше.</example>
- <example type="incorrect">По-перше, <marker>нашпигувати
по-друге</marker>, запекти.</example>
+ <example type="incorrect">По-перше, треба його
<marker>нашпигувати по-друге,</marker> запекти.</example>
</rule>
</rulegroup>
@@ -159,6 +160,7 @@
<pattern>
<token postag_regexp="yes" postag="pryim.*">
<exception negate_pos="yes" postag_regexp="yes"
postag="pryim.*" />
+ <exception>крім</exception>
</token>
<token postag_regexp="yes" postag="verb.*">
<exception negate_pos="yes" postag_regexp="yes"
postag="verb.*" />
@@ -169,6 +171,7 @@
<example type="correct">Їм треба в'їхати.</example>
<example type="correct">Просто взяти.</example>
<example type="correct">Просто неба.</example>
+ <example type="correct">не залишається нічого іншого, крім
чекати.</example>
<example type="incorrect">Їм треба <marker>в
їхати</marker>.</example>
</rule>
<rule>
@@ -207,6 +210,42 @@
<example type="incorrect">згідно <marker>до</marker>
правила</example>
</rule>
</rulegroup>
+
+ <rulegroup name="Пасивний предикат або пасивний атрибут (ev. апозиція)
з інструменталем дієвої особи" id="PASSIVE_PREDICATE">
+ <rule>
+ <pattern>
+ <token postag_regexp="yes" postag="(noun|pron).*v_zna"/>
+ <marker>
+ <token postag="impers"/> <!-- TODO: add skip adj -->
+ <token postag_regexp="yes" postag="(noun|pron).*v_oru"/>
+ </marker>
+ </pattern>
+ <message>Невластива мові конструкція: пасивний
предикат</message>
+
<url>http://kurylo.wikidot.com/3-nevlastyva-ukrainskii-movi-konstruktsiia:pasyvnyi-predykat</url>
+ <example type="correct">Користувача авторизовано</example>
+ <example type="correct">Справу порушено за фактом</example>
+ <example type="incorrect">Справу <marker>порушено
судом</marker></example>
+ <!--
+ <example type="incorrect">Справу <marker>порушено міським
судом</marker></example>
+ -->
+ <example type="incorrect">Користувача <marker>авторизовано
адміністратором</marker></example>
+ </rule>
+ <!--
+ <rule>
+ <pattern>
+ <token postag_regexp="yes" postag="(noun|pron).*(v_oru)"/>
+ <marker>
+ <token postag_regexp="yes" postag="verb:rev.*"/>
+ <token postag_regexp="yes" postag="(noun|pron).*v_zna"/>
+ </marker>
+ </pattern>
+ <message>Невластива мові конструкція: пасивний
предикат</message>
+
<url>http://kurylo.wikidot.com/3-nevlastyva-ukrainskii-movi-konstruktsiia:pasyvnyi-predykat</url>
+ <example type="correct">Видається багато книжок</example>
+ <example type="incorrect">Ним <marker>видаються
книжки</marker></example>
+ </rule>
+ -->
+ </rulegroup>
<!--
<rulegroup name="Узгодженість" id="CONISTENCY_VIDM_1">
<rule>
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/replace.txt
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/replace.txt
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/main/resources/org/languagetool/rules/uk/replace.txt
2013-05-02 02:09:11 UTC (rev 10033)
@@ -1,15 +1,21 @@
-# TODO: додати підтримку відмінювання
-# TODO: додати підтримку фраз
+# Format:
+# word=suggestion
+# or
+# word=suggestion1|suggestion2|suggestion3...
+#
+# TODO: add inflection support
# незмінювані
накінець=нарешті
# лексеми
+атакуючий=атакувальний|нападний
багатократний=багаторазовий
висок=скроня
гордитись=пишатись
лишнiй=зайвий
мiроприємство=захід
+нападаючий=нападник|нападальний|нападний
насморк=нежить
оточуючий=навколишній
поясниця=поперек
Added:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRuleTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRuleTest.java
(rev 0)
+++
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRuleTest.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -0,0 +1,58 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Marcin Miłkowski
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+package org.languagetool.rules.uk;
+
+import org.junit.Test;
+import org.languagetool.JLanguageTool;
+import org.languagetool.TestTools;
+import org.languagetool.language.Ukrainian;
+import org.languagetool.rules.RuleMatch;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class MorfologikUkrainianSpellerRuleTest {
+
+ @Test
+ public void testMorfologikSpeller() throws IOException {
+ final MorfologikUkrainianSpellerRule rule = new
MorfologikUkrainianSpellerRule (TestTools.getMessages("Ukrainian"), new
Ukrainian());
+
+ final JLanguageTool langTool = new JLanguageTool(new Ukrainian());
+
+ // correct sentences:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("До вас прийде
заввідділу!")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence(",")).length);
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("123454")).length);
+
+ //incorrect sentences:
+
+ RuleMatch[] matches =
rule.match(langTool.getAnalyzedSentence("атакуючий"));
+ // check match positions:
+ assertEquals(1, matches.length);
+
+ matches = rule.match(langTool.getAnalyzedSentence("шкляний"));
+
+ assertEquals(1, matches.length);
+ assertEquals("скляний", matches[0].getSuggestedReplacements().get(0));
+
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("а")).length);
+ }
+
+}
Property changes on:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRuleTest.java
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/SimpleReplaceRuleTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/SimpleReplaceRuleTest.java
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/rules/uk/SimpleReplaceRuleTest.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -26,6 +26,7 @@
import org.languagetool.rules.RuleMatch;
import java.io.IOException;
+import java.util.Arrays;
public class SimpleReplaceRuleTest extends TestCase {
@@ -44,6 +45,11 @@
matches = rule.match(langTool.getAnalyzedSentence("Ці рядки
повинні співпадати."));
assertEquals(1, matches.length);
assertEquals(1, matches[0].getSuggestedReplacements().size());
- assertEquals("збігатися",
matches[0].getSuggestedReplacements().get(0));
+ assertEquals(Arrays.asList("збігатися"),
matches[0].getSuggestedReplacements());
+
+ matches =
rule.match(langTool.getAnalyzedSentence("Нападаючий"));
+ assertEquals(1, matches.length);
+ assertEquals(Arrays.asList("Нападник", "Нападальний",
"Нападний"), matches[0].getSuggestedReplacements());
+
}
}
Modified:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianSRXSentenceTokenizerTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianSRXSentenceTokenizerTest.java
2013-05-01 22:44:07 UTC (rev 10032)
+++
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianSRXSentenceTokenizerTest.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -41,8 +41,7 @@
// testSplit("На початок 1994 р. державний борг України становив 4,8 млрд.
", "Досить значна сума.");
testSplit("Київ, вул. Сагайдачного, буд. 43, кв. 4.");
testSplit("Наша зустріч з А. Марчуком відбулася в грудні минулого року.");
-// TODO:
-// testSplit("Наша зустріч з А.Марчуком відбулася в грудні минулого року.");
+ testSplit("Наша зустріч з А.Марчуком відбулася в грудні минулого року.");
}
private void testSplit(final String... sentences) {
Added:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizerTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizerTest.java
(rev 0)
+++
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizerTest.java
2013-05-02 02:09:11 UTC (rev 10033)
@@ -0,0 +1,44 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
+ * USA
+ */
+
+package org.languagetool.tokenizers.uk;
+
+import java.util.Arrays;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+public class UkrainianWordTokenizerTest extends TestCase {
+ private final UkrainianWordTokenizer w = new UkrainianWordTokenizer();
+
+ public void testTokenize() {
+ List<String> testList = w.tokenize("Вони прийшли додому.");
+ assertEquals(Arrays.asList("Вони", " ", "прийшли", " ", "додому", "."),
testList);
+
+ testList = w.tokenize("Вони\u0301 прийшли пʼятими зів’ялими.");
+ assertEquals(Arrays.asList("Вони", " ", "прийшли", " ", "п'ятими", " ",
"зів'ялими", "."), testList);
+
+ testList = w.tokenize("Засідав І.Єрмолюк.");
+ assertEquals(Arrays.asList("Засідав", " ", "Єрмолюк", "."), testList);
+
+ testList = w.tokenize("Засідав І.П.Єрмолюк.");
+ assertEquals(Arrays.asList("Засідав", " ", "Єрмолюк", "."), testList);
+ }
+
+}
Property changes on:
trunk/languagetool/languagetool-language-modules/uk/src/test/java/org/languagetool/tokenizers/uk/UkrainianWordTokenizerTest.java
___________________________________________________________________
Added: svn:mime-type
## -0,0 +1 ##
+text/plain
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Introducing AppDynamics Lite, a free troubleshooting tool for Java/.NET
Get 100% visibility into your production application - at no cost.
Code-level diagnostics for performance bottlenecks with <2% overhead
Download for free and get started troubleshooting in minutes.
http://p.sf.net/sfu/appdyn_d2d_ap1
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits