Revision: 6140
http://languagetool.svn.sourceforge.net/languagetool/?rev=6140&view=rev
Author: dnaber
Date: 2012-01-01 19:23:05 +0000 (Sun, 01 Jan 2012)
Log Message:
-----------
[de] avoid false alarms caused by strange "NIL" taggings from Morphy
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/rules/de/CaseRule.java
trunk/JLanguageTool/src/test/org/languagetool/rules/de/CaseRuleTest.java
Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/de/CaseRule.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/de/CaseRule.java
2012-01-01 19:19:06 UTC (rev 6139)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/de/CaseRule.java
2012-01-01 19:23:05 UTC (rev 6140)
@@ -321,6 +321,7 @@
!StringTools.isAllUppercase(token) &&
!exceptions.contains(token) &&
!analyzedToken.hasReadingOfType(POSType.PROPER_NOUN) &&
+ !isNilReading(analyzedToken) &&
!analyzedToken.isSentenceEnd() &&
!isExceptionPhrase(i, tokens)) {
final String msg = "Außer am Satzanfang werden nur Nomen und Eigennamen
großgeschrieben";
@@ -333,6 +334,17 @@
}
}
+ /** Morphy has about 750 words tagged: wkl="NIL" tip="SUB" - ignore these. */
+ private boolean isNilReading(AnalyzedGermanTokenReadings analyzedToken) {
+ final List<AnalyzedGermanToken> germanReadings =
analyzedToken.getGermanReadings();
+ if (germanReadings.size() > 0) {
+ if ("NIL:SUB".equals(germanReadings.get(0).getPOSTag())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
private boolean isExceptionPhrase(int i, AnalyzedTokenReadings[] tokens) {
// TODO: speed up?
for (String exc : myExceptionPhrases) {
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/de/CaseRuleTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/rules/de/CaseRuleTest.java
2012-01-01 19:19:06 UTC (rev 6139)
+++ trunk/JLanguageTool/src/test/org/languagetool/rules/de/CaseRuleTest.java
2012-01-01 19:23:05 UTC (rev 6140)
@@ -51,6 +51,8 @@
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz.
«Ein Zitat.»")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz.
»Ein Zitat.«")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Hier ein Satz.
(Noch einer.)")).length);
+ // "NIL" reading in Morphy that used to confuse CaseRule:
+ assertEquals(0, rule.match(langTool.getAnalyzedSentence("Ein
Menschenfreund.")).length);
// works only thanks to addex.txt:
assertEquals(0, rule.match(langTool.getAnalyzedSentence("Der
Nachfahre.")).length);
// both can be correct:
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Ridiculously easy VDI. With Citrix VDI-in-a-Box, you don't need a complex
infrastructure or vast IT resources to deliver seamless, secure access to
virtual desktops. With this all-in-one solution, easily deploy virtual
desktops for less than the cost of PCs and save 60% on VDI infrastructure
costs. Try it free! http://p.sf.net/sfu/Citrix-VDIinabox
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs