[LanguageTool] SF.net SVN: languagetool:[7452] trunk/JLanguageTool/src

milek_pl Fri, 22 Jun 2012 02:06:53 -0700

Revision: 7452
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7452&view=rev
Author:   milek_pl
Date:     2012-06-22 09:06:39 +0000 (Fri, 22 Jun 2012)
Log Message:
-----------
fix a problem with the disambiguator code not following with the specification 
(wd is now applicable for disambiguator action REPLACE)


Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    trunk/JLanguageTool/src/resource/pl/disambiguation.xml
    trunk/JLanguageTool/src/rules/pl/grammar.xml

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-22 07:32:09 UTC (rev 7451)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationPatternRule.java
   2012-06-22 09:06:39 UTC (rev 7452)
@@ -104,7 +104,8 @@
         && disambAction != DisambiguatorAction.UNIFY
         && disambAction != DisambiguatorAction.ADD
         && disambAction != DisambiguatorAction.REMOVE
-        && disambAction != DisambiguatorAction.IMMUNIZE) {
+        && disambAction != DisambiguatorAction.IMMUNIZE
+        && disambAction != DisambiguatorAction.REPLACE) {
       throw new NullPointerException("disambiguated POS cannot be null");
     }    
     this.disambiguatedPOS = disamb;
@@ -252,10 +253,11 @@
             + endPositionCorrection) {
           String lemma = "";
           String token = "";
-          for (int i = 0; i < newTokenReadings.length; i++) {            
+          for (int i = 0; i < newTokenReadings.length; i++) {
+            final int position = text.getOriginalPosition(firstMatchToken + 
correctedStPos
+                      + i);
             if ("".equals(newTokenReadings[i].getToken())) { //empty token 
-              token = whTokens[text.getOriginalPosition(firstMatchToken + 
correctedStPos
-                  + i)].getToken();
+              token = whTokens[position].getToken();
             } else {
               token = newTokenReadings[i].getToken();
             }
@@ -265,8 +267,7 @@
               lemma = newTokenReadings[i].getLemma();
             }
             final AnalyzedToken newTok = new AnalyzedToken(token, 
newTokenReadings[i].getPOSTag(), lemma);
-            final int position = text.getOriginalPosition(firstMatchToken + 
correctedStPos
-                    + i);
+            
             final String prevValue = whTokens[position].toString();
             final String prevAnot = 
whTokens[position].getHistoricalAnnotations();
             whTokens[position].addReading(newTok);
@@ -294,7 +295,31 @@
     case REPLACE:
     default:
       if (!filtered) {
-        if (matchElement == null) {
+          if (newTokenReadings != null && newTokenReadings.length > 0) {
+              if (newTokenReadings.length == matchingTokens - 
startPositionCorrection
+                  + endPositionCorrection) {
+                String lemma = "";
+                String token = "";
+                for (int i = 0; i < newTokenReadings.length; i++) {
+                    final int position = 
text.getOriginalPosition(firstMatchToken + correctedStPos
+                            + i);
+                  if ("".equals(newTokenReadings[i].getToken())) { //empty 
token 
+                    token = whTokens[position].getToken();
+                  } else {
+                    token = newTokenReadings[i].getToken();
+                  }
+                  if (newTokenReadings[i].getLemma() == null) { //empty lemma
+                    lemma = token;
+                  } else {
+                    lemma = newTokenReadings[i].getLemma();
+                  }                                                            
          
+                  final AnalyzedTokenReadings toReplace = new 
AnalyzedTokenReadings(
+                          new AnalyzedToken(token, 
newTokenReadings[i].getPOSTag(), lemma), 
+                                  whTokens[fromPos].getStartPos());
+                  whTokens[position] = replaceTokens(whTokens[position], 
toReplace);
+                }
+              }
+              } else if (matchElement == null) {                               
           String lemma = "";
           for (int l = 0; l < numRead; l++) {
             if (whTokens[fromPos].getAnalyzedToken(l).getPOSTag() != null
@@ -310,20 +335,8 @@
 
           final AnalyzedTokenReadings toReplace = new AnalyzedTokenReadings(
               new AnalyzedToken(whTokens[fromPos].getToken(), disambiguatedPOS,
-                  lemma), whTokens[fromPos].getStartPos());
-          final boolean isSentEnd = whTokens[fromPos].isSentEnd();
-          final boolean isParaEnd = whTokens[fromPos].isParaEnd();
-          final String prevValue = whTokens[fromPos].toString();
-          whTokens[fromPos] = toReplace;
-          if (isSentEnd) {
-            whTokens[fromPos].setSentEnd();            
-          }
-          if (isParaEnd) {
-            whTokens[fromPos].setParaEnd();
-          }
-          whTokens[fromPos].setWhitespaceBefore(spaceBefore);
-          final String prevAnot = whTokens[fromPos].getHistoricalAnnotations();
-          annotateChange(whTokens[fromPos], prevValue, prevAnot);
+                  lemma), whTokens[fromPos].getStartPos());                    
+          whTokens[fromPos] = replaceTokens(whTokens[fromPos], toReplace);     
     
         } else {
           // using the match element
           matchElement.setToken(whTokens[fromPos]);
@@ -334,9 +347,28 @@
           annotateChange(whTokens[fromPos], prevValue, prevAnot);
         }
       }
+    
     }
     return whTokens;
   }
+  
+  private AnalyzedTokenReadings replaceTokens(AnalyzedTokenReadings oldAtr, 
final AnalyzedTokenReadings newAtr) {
+      final String prevValue = oldAtr.toString();
+      final String prevAnot = oldAtr.getHistoricalAnnotations();
+      final boolean isSentEnd = oldAtr.isSentEnd();
+      final boolean isParaEnd = oldAtr.isParaEnd();
+      final boolean spaceBefore = oldAtr.isWhitespaceBefore();
+      AnalyzedTokenReadings a = newAtr;      
+      if (isSentEnd) {
+          a.setSentEnd();            
+        }
+        if (isParaEnd) {
+            a.setParaEnd();
+        }
+      a.setWhitespaceBefore(spaceBefore);
+      annotateChange(a, prevValue, prevAnot);
+      return a;
+  }
 
   private void annotateChange(AnalyzedTokenReadings atr, final String 
prevValue, String prevAnot) {      
       atr.setHistoricalAnnotations(prevAnot + "\n" +

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    2012-06-22 07:32:09 UTC (rev 7451)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tagging/disambiguation/rules/DisambiguationRuleLoader.java
    2012-06-22 09:06:39 UTC (rev 7452)
@@ -264,8 +264,10 @@
 
       final int matchedTokenCount = endPos - startPos;
       if (newWdList != null) {
-        if (disambigAction == DisambiguatorAction.ADD || disambigAction == 
DisambiguatorAction.REMOVE) {
-          if (newWdList.size() != matchedTokenCount) {
+        if (disambigAction == DisambiguatorAction.ADD || disambigAction == 
DisambiguatorAction.REMOVE
+                || disambigAction == DisambiguatorAction.REPLACE) {
+          if ((!newWdList.isEmpty() && disambigAction == 
DisambiguatorAction.REPLACE) 
+                  && newWdList.size() != matchedTokenCount) {
             throw new SAXException(
                 language.getName() + " rule error. The number of 
interpretations specified with wd: "
                     + newWdList.size()

Modified: trunk/JLanguageTool/src/resource/pl/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-22 
07:32:09 UTC (rev 7451)
+++ trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-22 
09:06:39 UTC (rev 7452)
@@ -1029,5 +1029,52 @@
        <disambig action="replace"><match no="2" postag_regexp="yes" 
postag="subst.*">Tel</match></disambig>
     </rule>
     
+    <rule name="n.e." id="NE_BREV">
+       <pattern>
+               <token>n</token>
+               <token>.</token>
+               <marker>
+               <token>e</token>
+               </marker>
+               <token>.</token>
+       </pattern>
+       <disambig action="add"><wd lemma="era" pos="brev:pun"/></disambig>
+    </rule>
     
+    <rule name="nn" id="NN_BREV">
+       <pattern>
+               <token>nn</token>
+               <token>.</token>
+       </pattern>
+       <disambig postag="brev:pun"/>
+    </rule>    
+    
+    <rule name="red. nauk." id="red_nauk_brev">
+       <pattern>
+       <token>red</token>
+       <token>.</token>
+       <marker>
+       <token>nauk</token>
+       </marker>
+       <token>.</token>
+       </pattern>
+       <disambig action="add"><wd lemma="naukowy" pos="brev:pun"/></disambig>
+    </rule>
+    
+    <rule name="n.p.m." id="npm">
+       <pattern>
+               <token regexp="yes">[np]</token>
+               <token>.</token>
+               <token>p</token>
+               <token>.</token>
+               <marker>
+                       <token>m</token>
+               </marker>
+               <token>.</token>
+       </pattern>
+       <disambig action="replace"><wd lemma="morza" pos="brev:pun"/></disambig>
+       <example type="untouched">Przebiegł 20 m.</example>
+       <example type="ambiguous" inputform="m[metr/brev:npun]" 
outputform="m[morza/brev:pun]">Ta góra ma wysokość 20 m n. p. 
<marker>m</marker>.</example>
+    </rule>
+    
 </rules>
\ No newline at end of file

Modified: trunk/JLanguageTool/src/rules/pl/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-22 07:32:09 UTC 
(rev 7451)
+++ trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-22 09:06:39 UTC 
(rev 7452)
@@ -2229,13 +2229,14 @@
         <rule id="KROPKA_W_NAWIASIE" name="Kropka w nawiasie">
             <pattern>
                 <token><exception>(</exception></token>
-                <token spacebefore="ignore"><exception 
regexp="yes">proc|r|itd|itp|cdn|jw|n.e|w|nn|n|br</exception><exception 
postag="brev:pun"/></token>
+                <token spacebefore="ignore"><exception 
regexp="yes">\d+</exception><exception postag="brev:pun"/></token>
                 <marker>
                     <token spacebefore="no"><exception 
postag="&lt;/ELLIPSIS&gt;"></exception>.</token>
                     <token postag="SENT_END" spacebefore="no">)</token>
                 </marker>
             </pattern>
             <message>Kropka kończąca zdanie powinna znajdować się za nawiasem 
(uwaga: dawniejsze reguły nakazywały stawiać kropkę przed nawiasem, jeśli w 
nawiasie znajdowało się całe zdanie; te reguły są dziś nieaktualne). Poprawnie: 
<suggestion>).</suggestion></message>
+            <url>http://so.pwn.pl/zasady.php?id=629865</url>
             <short>Błędne umiejscowienie kropki</short>
             <example correction=")." type="incorrect">(Całe zdanie jest w 
nawiasie<marker>.)</marker></example>
             <example type="correct">Jest na to wiele przykładów (herbata, 
mleko, woda...).</example>
@@ -2243,6 +2244,10 @@
             <example type="correct">Po zakończeniu jego kadencji (pod koniec 
maja br.) zamierzam ubiegać się o wybór do zarządu.</example>
             <example type="correct">Sienkiewicz, Henryk (red.)</example>
             <example type="correct">To duża kwota (730 tys.)</example>
+            <example type="correct">To długo trwało (zwłaszcza w latach 50.) i 
do dziś trwa.</example>
+            <example type="correct">(Gnaeus Domitius Ahenobarbus, w 28 
n.e.)</example>
+            <example type="correct">Zofia Staszczak (red. nauk.) Słownik 
etnologiczny.</example>
+            <example type="correct">Jezioro Bajkał (1637 m p.p.m.)</example>
         </rule>
         <rule id="KROPKA_W_CUDZYSLOWIE" name="Kropka w cudzysłowie">
             <pattern>

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[7452] trunk/JLanguageTool/src

Reply via email to