Revision: 9046
http://languagetool.svn.sourceforge.net/languagetool/?rev=9046&view=rev
Author: dnaber
Date: 2013-01-17 21:36:00 +0000 (Thu, 17 Jan 2013)
Log Message:
-----------
bugfix: the uppercase sentence rule sometimes was not triggered for Russian;
also cleaned up that rule a bit
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
trunk/JLanguageTool/src/test/java/org/languagetool/rules/UppercaseSentenceStartRuleTest.java
trunk/JLanguageTool/src/test/java/org/languagetool/server/HTTPServerTest.java
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2013-01-17 20:44:49 UTC (rev 9045)
+++ trunk/JLanguageTool/CHANGES.txt 2013-01-17 21:36:00 UTC (rev 9046)
@@ -2,7 +2,7 @@
2.1-dev (release planned for 2013-03-31)
- -Breton
+ -Breton:
-fixed several incorrect suggestions thanks to added tests on corrections.
-Catalan:
@@ -31,8 +31,11 @@
the three morfologik spell checker dictionaries (German, Austrian, und
Swiss).
-Portuguese:
- -It now has over 1000 compound words taken from a huge Porto Editora
dictionary
- -Added/improved several rules
+ -it now has over 1000 compound words taken from a huge Porto Editora
dictionary
+ -added/improved several rules
+
+ -Russion:
+ -bugfix: the uppercase sentence rule sometimes was not triggered
-bugfix: suggestions for compounds parts were missing sometimes
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
2013-01-17 20:44:49 UTC (rev 9045)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
2013-01-17 21:36:00 UTC (rev 9046)
@@ -25,6 +25,7 @@
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
+import org.languagetool.tools.StringTools;
/**
* Checks that a sentence starts with an uppercase letter.
@@ -68,13 +69,11 @@
String thirdToken = null;
// ignore quote characters:
if (tokens.length >= 3
- && ("'".equals(firstToken) || "\"".equals(firstToken) || "„"
- .equals(firstToken))) {
+ && ("'".equals(firstToken) || "\"".equals(firstToken) ||
"„".equals(firstToken))) {
matchTokenPos = 2;
secondToken = tokens[matchTokenPos].getToken();
}
- final String firstDutchToken = dutchSpecialCase(firstToken, secondToken,
- tokens);
+ final String firstDutchToken = dutchSpecialCase(firstToken, secondToken,
tokens);
if (firstDutchToken != null) {
thirdToken = firstDutchToken;
matchTokenPos = 3;
@@ -87,43 +86,40 @@
checkToken = secondToken;
}
- final String lastToken = tokens[tokens.length - 1].getToken();
-
- boolean noException = false;
- //fix for lists; note - this will not always work for the last point in
OOo,
- //as OOo might serve paragraphs in any order.
- if ((language == Language.RUSSIAN || language == Language.POLISH ||
language == Language.UKRAINIAN || language == Language.BELARUSIAN)
- && (";".equals(lastParagraphString) || ";".equals(lastToken)
- || ",".equals(lastParagraphString) || ",".equals(lastToken))) {
- noException = true;
+ String lastToken = tokens[tokens.length - 1].getToken();
+ if (lastToken.matches("[ \"'„»«“]") && tokens.length >= 2) {
+ // ignore trailing whitespace or quote
+ lastToken = tokens[tokens.length - 2].getToken();
}
- //fix for comma in last paragraph; note - this will not always work for
the last point in OOo,
- //as OOo might serve paragraphs in any order.
- if ((language == Language.RUSSIAN || language == Language.ITALIAN
- || language == Language.POLISH || language == Language.GERMAN ||
language == Language.UKRAINIAN || language == Language.BELARUSIAN)
- && (",".equals(lastParagraphString))) {
- noException = true;
- }
-
-
- //fix for words in table (not sentences); note - this will not always work
for the last point in OOo,
- //as OOo might serve paragraphs in any order.
- if ((language == Language.RUSSIAN || language == Language.ENGLISH )
- && !(lastToken.matches("[.?!…;,]"))) {
- noException = true;
- }
+ boolean preventError = false;
+ // TODO: why do only *these* languages have that special case?
+ final boolean languageHasSpecialCases = language == Language.RUSSIAN ||
language == Language.POLISH
+ || language == Language.UKRAINIAN || language ==
Language.BELARUSIAN || language == Language.ENGLISH
+ || language == Language.ITALIAN || language == Language.GERMAN;
+ if (languageHasSpecialCases) {
+ //fix for lists; note - this will not always work for the last point in
OOo,
+ //as OOo might serve paragraphs in any order.
+ if (";".equals(lastParagraphString) || ";".equals(lastToken) ||
",".equals(lastParagraphString) || ",".equals(lastToken)) {
+ preventError = true;
+ }
+ //fix for words in table (not sentences); note - this will not always
work for the last point in OOo,
+ //as OOo might serve paragraphs in any order.
+ if (!lastToken.matches("[.?!…]")) {
+ preventError = true;
+ }
+ }
+
lastParagraphString = lastToken;
if (checkToken.length() > 0) {
final char firstChar = checkToken.charAt(0);
- if (Character.isLowerCase(firstChar) && (!noException)) {
+ if (!preventError && Character.isLowerCase(firstChar)) {
final RuleMatch ruleMatch = new RuleMatch(this,
tokens[matchTokenPos].getStartPos(),
tokens[matchTokenPos].getStartPos() +
tokens[matchTokenPos].getToken().length(),
messages.getString("incorrect_case"));
- ruleMatch.setSuggestedReplacement(Character.toUpperCase(firstChar)
- + checkToken.substring(1));
+
ruleMatch.setSuggestedReplacement(StringTools.uppercaseFirstChar(checkToken));
ruleMatches.add(ruleMatch);
}
}
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/rules/UppercaseSentenceStartRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/rules/UppercaseSentenceStartRuleTest.java
2013-01-17 20:44:49 UTC (rev 9045)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/rules/UppercaseSentenceStartRuleTest.java
2013-01-17 21:36:00 UTC (rev 9046)
@@ -32,6 +32,22 @@
*/
public class UppercaseSentenceStartRuleTest extends TestCase {
+ public void testNonSentences() throws IOException {
+ // In OO/LO we get text per paragraph, and list items are a paragraph.
+ // Make sure the items that don't look like a sentence generate no error.
+ JLanguageTool langTool = new JLanguageTool(Language.ENGLISH);
+
+ assertEquals(0, langTool.check("a list item").size());
+ assertEquals(0, langTool.check("a list item,").size());
+ assertEquals(0, langTool.check("with trailing whitespace, ").size());
+ assertEquals(0, langTool.check("a list item;").size());
+ assertEquals(0, langTool.check("A sentence.").size());
+ assertEquals(0, langTool.check("A sentence!").size());
+
+ assertEquals(1, langTool.check("a sentence.").size());
+ assertEquals(1, langTool.check("a sentence!").size());
+ }
+
public void testRule() throws IOException {
JLanguageTool langTool = new JLanguageTool(Language.GERMAN);
List<RuleMatch> matches;
@@ -53,13 +69,15 @@
matches = langTool.check("Sehr geehrte Frau Merkel,\nwie wir Ihnen schon
früher mitgeteilt haben...");
assertEquals(0, matches.size());
-
- matches = langTool.check("Dies ist ein Satz. und hier kommt noch einer");
- assertEquals(1, matches.size());
+ matches = langTool.check("Dies ist ein Satz. aber das hier noch nicht");
+ assertEquals(0, matches.size());
+
matches = langTool.check("Dies ist ein Satz. ätsch, noch einer mit
Umlaut.");
assertEquals(1, matches.size());
matches = langTool.check("Dies ist ein Satz. \"aber der hier auch!\"");
assertEquals(1, matches.size());
+ matches = langTool.check("Dies ist ein Satz. „aber der hier auch!“");
+ assertEquals(1, matches.size());
matches = langTool.check("\"dies ist ein Satz!\"");
assertEquals(1, matches.size());
matches = langTool.check("'dies ist ein Satz!'");
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/server/HTTPServerTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/server/HTTPServerTest.java
2013-01-17 20:44:49 UTC (rev 9045)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/server/HTTPServerTest.java
2013-01-17 21:36:00 UTC (rev 9046)
@@ -63,7 +63,7 @@
final String result2 = check(Language.GERMAN, "Ein kleiner test");
assertTrue("Got " + result2 + ", expected " + emptyResultPattern,
result2.matches(emptyResultPattern));
// one error:
- assertTrue(check(Language.GERMAN, "ein kleiner
test").contains("UPPERCASE_SENTENCE_START"));
+ assertTrue(check(Language.GERMAN, "ein kleiner
test.").contains("UPPERCASE_SENTENCE_START"));
// two errors:
final String result = check(Language.GERMAN, "ein kleiner test. Und wieder
Erwarten noch was: \u00f6\u00e4\u00fc\u00df.");
assertTrue(result.contains("UPPERCASE_SENTENCE_START"));
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnmore_122712
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits