Revision: 7452
http://languagetool.svn.sourceforge.net/languagetool/?rev=7452&view=rev
Author: milek_pl
Date: 2012-06-22 09:06:39 +0000 (Fri, 22 Jun 2012)
Log Message:
-----------
fix a problem with the disambiguator code not following with the specification
(wd is now applicable for disambiguator action REPLACE)
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
trunk/JLanguageTool/src/resource/pl/disambiguation.xml
trunk/JLanguageTool/src/rules/pl/grammar.xml
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-22 07:32:09 UTC (rev 7451)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
2012-06-22 09:06:39 UTC (rev 7452)
@@ -104,7 +104,8 @@
&& disambAction != DisambiguatorAction.UNIFY
&& disambAction != DisambiguatorAction.ADD
&& disambAction != DisambiguatorAction.REMOVE
- && disambAction != DisambiguatorAction.IMMUNIZE) {
+ && disambAction != DisambiguatorAction.IMMUNIZE
+ && disambAction != DisambiguatorAction.REPLACE) {
throw new NullPointerException("disambiguated POS cannot be null");
}
this.disambiguatedPOS = disamb;
@@ -252,10 +253,11 @@
+ endPositionCorrection) {
String lemma = "";
String token = "";
- for (int i = 0; i < newTokenReadings.length; i++) {
+ for (int i = 0; i < newTokenReadings.length; i++) {
+ final int position = text.getOriginalPosition(firstMatchToken +
correctedStPos
+ + i);
if ("".equals(newTokenReadings[i].getToken())) { //empty token
- token = whTokens[text.getOriginalPosition(firstMatchToken +
correctedStPos
- + i)].getToken();
+ token = whTokens[position].getToken();
} else {
token = newTokenReadings[i].getToken();
}
@@ -265,8 +267,7 @@
lemma = newTokenReadings[i].getLemma();
}
final AnalyzedToken newTok = new AnalyzedToken(token,
newTokenReadings[i].getPOSTag(), lemma);
- final int position = text.getOriginalPosition(firstMatchToken +
correctedStPos
- + i);
+
final String prevValue = whTokens[position].toString();
final String prevAnot =
whTokens[position].getHistoricalAnnotations();
whTokens[position].addReading(newTok);
@@ -294,7 +295,31 @@
case REPLACE:
default:
if (!filtered) {
- if (matchElement == null) {
+ if (newTokenReadings != null && newTokenReadings.length > 0) {
+ if (newTokenReadings.length == matchingTokens -
startPositionCorrection
+ + endPositionCorrection) {
+ String lemma = "";
+ String token = "";
+ for (int i = 0; i < newTokenReadings.length; i++) {
+ final int position =
text.getOriginalPosition(firstMatchToken + correctedStPos
+ + i);
+ if ("".equals(newTokenReadings[i].getToken())) { //empty
token
+ token = whTokens[position].getToken();
+ } else {
+ token = newTokenReadings[i].getToken();
+ }
+ if (newTokenReadings[i].getLemma() == null) { //empty lemma
+ lemma = token;
+ } else {
+ lemma = newTokenReadings[i].getLemma();
+ }
+ final AnalyzedTokenReadings toReplace = new
AnalyzedTokenReadings(
+ new AnalyzedToken(token,
newTokenReadings[i].getPOSTag(), lemma),
+ whTokens[fromPos].getStartPos());
+ whTokens[position] = replaceTokens(whTokens[position],
toReplace);
+ }
+ }
+ } else if (matchElement == null) {
String lemma = "";
for (int l = 0; l < numRead; l++) {
if (whTokens[fromPos].getAnalyzedToken(l).getPOSTag() != null
@@ -310,20 +335,8 @@
final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS,
- lemma), whTokens[fromPos].getStartPos());
- final boolean isSentEnd = whTokens[fromPos].isSentEnd();
- final boolean isParaEnd = whTokens[fromPos].isParaEnd();
- final String prevValue = whTokens[fromPos].toString();
- whTokens[fromPos] = toReplace;
- if (isSentEnd) {
- whTokens[fromPos].setSentEnd();
- }
- if (isParaEnd) {
- whTokens[fromPos].setParaEnd();
- }
- whTokens[fromPos].setWhitespaceBefore(spaceBefore);
- final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
- annotateChange(whTokens[fromPos], prevValue, prevAnot);
+ lemma), whTokens[fromPos].getStartPos());
+ whTokens[fromPos] = replaceTokens(whTokens[fromPos], toReplace);
} else {
// using the match element
matchElement.setToken(whTokens[fromPos]);
@@ -334,9 +347,28 @@
annotateChange(whTokens[fromPos], prevValue, prevAnot);
}
}
+
}
return whTokens;
}
+
+ private AnalyzedTokenReadings replaceTokens(AnalyzedTokenReadings oldAtr,
final AnalyzedTokenReadings newAtr) {
+ final String prevValue = oldAtr.toString();
+ final String prevAnot = oldAtr.getHistoricalAnnotations();
+ final boolean isSentEnd = oldAtr.isSentEnd();
+ final boolean isParaEnd = oldAtr.isParaEnd();
+ final boolean spaceBefore = oldAtr.isWhitespaceBefore();
+ AnalyzedTokenReadings a = newAtr;
+ if (isSentEnd) {
+ a.setSentEnd();
+ }
+ if (isParaEnd) {
+ a.setParaEnd();
+ }
+ a.setWhitespaceBefore(spaceBefore);
+ annotateChange(a, prevValue, prevAnot);
+ return a;
+ }
private void annotateChange(AnalyzedTokenReadings atr, final String
prevValue, String prevAnot) {
atr.setHistoricalAnnotations(prevAnot + "\n" +
Modified:
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
2012-06-22 07:32:09 UTC (rev 7451)
+++
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
2012-06-22 09:06:39 UTC (rev 7452)
@@ -264,8 +264,10 @@
final int matchedTokenCount = endPos - startPos;
if (newWdList != null) {
- if (disambigAction == DisambiguatorAction.ADD || disambigAction ==
DisambiguatorAction.REMOVE) {
- if (newWdList.size() != matchedTokenCount) {
+ if (disambigAction == DisambiguatorAction.ADD || disambigAction ==
DisambiguatorAction.REMOVE
+ || disambigAction == DisambiguatorAction.REPLACE) {
+ if ((!newWdList.isEmpty() && disambigAction ==
DisambiguatorAction.REPLACE)
+ && newWdList.size() != matchedTokenCount) {
throw new SAXException(
language.getName() + " rule error. The number of
interpretations specified with wd: "
+ newWdList.size()
Modified: trunk/JLanguageTool/src/resource/pl/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/pl/disambiguation.xml 2012-06-22
07:32:09 UTC (rev 7451)
+++ trunk/JLanguageTool/src/resource/pl/disambiguation.xml 2012-06-22
09:06:39 UTC (rev 7452)
@@ -1029,5 +1029,52 @@
<disambig action="replace"><match no="2" postag_regexp="yes"
postag="subst.*">Tel</match></disambig>
</rule>
+ <rule name="n.e." id="NE_BREV">
+ <pattern>
+ <token>n</token>
+ <token>.</token>
+ <marker>
+ <token>e</token>
+ </marker>
+ <token>.</token>
+ </pattern>
+ <disambig action="add"><wd lemma="era" pos="brev:pun"/></disambig>
+ </rule>
+ <rule name="nn" id="NN_BREV">
+ <pattern>
+ <token>nn</token>
+ <token>.</token>
+ </pattern>
+ <disambig postag="brev:pun"/>
+ </rule>
+
+ <rule name="red. nauk." id="red_nauk_brev">
+ <pattern>
+ <token>red</token>
+ <token>.</token>
+ <marker>
+ <token>nauk</token>
+ </marker>
+ <token>.</token>
+ </pattern>
+ <disambig action="add"><wd lemma="naukowy" pos="brev:pun"/></disambig>
+ </rule>
+
+ <rule name="n.p.m." id="npm">
+ <pattern>
+ <token regexp="yes">[np]</token>
+ <token>.</token>
+ <token>p</token>
+ <token>.</token>
+ <marker>
+ <token>m</token>
+ </marker>
+ <token>.</token>
+ </pattern>
+ <disambig action="replace"><wd lemma="morza" pos="brev:pun"/></disambig>
+ <example type="untouched">Przebiegł 20 m.</example>
+ <example type="ambiguous" inputform="m[metr/brev:npun]"
outputform="m[morza/brev:pun]">Ta góra ma wysokość 20 m n. p.
<marker>m</marker>.</example>
+ </rule>
+
</rules>
\ No newline at end of file
Modified: trunk/JLanguageTool/src/rules/pl/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/pl/grammar.xml 2012-06-22 07:32:09 UTC
(rev 7451)
+++ trunk/JLanguageTool/src/rules/pl/grammar.xml 2012-06-22 09:06:39 UTC
(rev 7452)
@@ -2229,13 +2229,14 @@
<rule id="KROPKA_W_NAWIASIE" name="Kropka w nawiasie">
<pattern>
<token><exception>(</exception></token>
- <token spacebefore="ignore"><exception
regexp="yes">proc|r|itd|itp|cdn|jw|n.e|w|nn|n|br</exception><exception
postag="brev:pun"/></token>
+ <token spacebefore="ignore"><exception
regexp="yes">\d+</exception><exception postag="brev:pun"/></token>
<marker>
<token spacebefore="no"><exception
postag="</ELLIPSIS>"></exception>.</token>
<token postag="SENT_END" spacebefore="no">)</token>
</marker>
</pattern>
<message>Kropka kończąca zdanie powinna znajdować się za nawiasem
(uwaga: dawniejsze reguły nakazywały stawiać kropkę przed nawiasem, jeśli w
nawiasie znajdowało się całe zdanie; te reguły są dziś nieaktualne). Poprawnie:
<suggestion>).</suggestion></message>
+ <url>http://so.pwn.pl/zasady.php?id=629865</url>
<short>Błędne umiejscowienie kropki</short>
<example correction=")." type="incorrect">(Całe zdanie jest w
nawiasie<marker>.)</marker></example>
<example type="correct">Jest na to wiele przykładów (herbata,
mleko, woda...).</example>
@@ -2243,6 +2244,10 @@
<example type="correct">Po zakończeniu jego kadencji (pod koniec
maja br.) zamierzam ubiegać się o wybór do zarządu.</example>
<example type="correct">Sienkiewicz, Henryk (red.)</example>
<example type="correct">To duża kwota (730 tys.)</example>
+ <example type="correct">To długo trwało (zwłaszcza w latach 50.) i
do dziś trwa.</example>
+ <example type="correct">(Gnaeus Domitius Ahenobarbus, w 28
n.e.)</example>
+ <example type="correct">Zofia Staszczak (red. nauk.) Słownik
etnologiczny.</example>
+ <example type="correct">Jezioro Bajkał (1637 m p.p.m.)</example>
</rule>
<rule id="KROPKA_W_CUDZYSLOWIE" name="Kropka w cudzysłowie">
<pattern>
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs