[LanguageTool] SF.net SVN: languagetool:[7337] trunk/JLanguageTool/src

milek_pl Wed, 13 Jun 2012 11:06:15 -0700

Revision: 7337
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7337&view=rev
Author:   milek_pl
Date:     2012-06-13 18:05:52 +0000 (Wed, 13 Jun 2012)
Log Message:
-----------
[pl] fix false alarms


Modified Paths:
--------------
    trunk/JLanguageTool/src/resource/pl/disambiguation.xml
    trunk/JLanguageTool/src/rules/pl/grammar.xml

Modified: trunk/JLanguageTool/src/resource/pl/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-13 
17:16:33 UTC (rev 7336)
+++ trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-13 
18:05:52 UTC (rev 7337)
@@ -17,12 +17,24 @@
         <equivalence type="f">
             <token postag=".*[\.:]f([\.:].*)?" postag_regexp="yes"/>
         </equivalence>
-        <equivalence type="m">
-            <token postag=".*[\.:]m[1-4]?([\.:].*)?" postag_regexp="yes"/>
+        <equivalence type="m1">
+            <token postag=".*[\.:]m1([\.:].*)?" postag_regexp="yes"/>
         </equivalence>
-        <equivalence type="n">
-            <token postag=".*[\.:]n[1-2]?([\.:].*)?" postag_regexp="yes"/>
+        <equivalence type="m2">
+            <token postag=".*[\.:]m2([\.:].*)?" postag_regexp="yes"/>
         </equivalence>
+        <equivalence type="m3">
+            <token postag=".*[\.:]m3([\.:].*)?" postag_regexp="yes"/>
+        </equivalence>
+        <equivalence type="n1">
+            <token postag=".*[\.:]n1([\.:].*)?" postag_regexp="yes"></token>
+        </equivalence>
+        <equivalence type="n2">
+            <token postag=".*[\.:]n2([\.:].*)?" postag_regexp="yes"></token>
+        </equivalence>
+        <equivalence type="p">
+            <token postag=".*[\.:]p[123]([\.:].*)?" 
postag_regexp="yes"></token>
+        </equivalence>
     </unification>
     <unification feature="case">
         <equivalence type="nom">
@@ -119,7 +131,7 @@
             <marker>
                 <unify> <feature id="number"/><feature id="gender"/> <feature 
id="case"/>
                     <token>tym</token>
-                    <token postag_regexp="yes" postag="subst:sg.*"/>
+                    <token postag_regexp="yes" postag="(subst|ger):*"/>
                 </unify>
             </marker>
         </pattern>
@@ -152,13 +164,98 @@
         </rule>
     </rulegroup>
     
+    <rulegroup name="bez czarny" id="bez_czarny">
+       <rule>
+               <pattern>
+               <marker>
+                       <token>bez</token>
+               </marker>
+                       <token>czarny</token>
+               </pattern>
+               <disambig action="filter" 
postag="subst:sg:(acc|nom):m3"></disambig>
+               <example type="untouched">Proszę kremówkę bez kremu.</example>
+               <example type="ambiguous" 
inputform="bez[bez/prep:gen,bez/subst:sg:acc:m3,bez/subst:sg:nom:m3,beza/subst:pl:gen:f]"
 outputform="bez[bez/subst:sg:acc:m3,bez/subst:sg:nom:m3]">Jaki piękny jest 
<marker>bez</marker> czarny.</example>
+       </rule>
+       <rule>
+               <pattern>
+               <token>dziki</token>
+               <marker>
+                       <token>bez</token>
+               </marker>
+               <token><exception postag_regexp="yes" 
postag=".*gen.*"></exception></token>                     
+               </pattern>
+               <disambig action="filter" 
postag="subst:sg:(acc|nom):m3"></disambig>
+               <example type="untouched">Proszę kremówkę bez kremu.</example>
+               <example type="ambiguous" 
inputform="bez[bez/prep:gen,bez/subst:sg:acc:m3,bez/subst:sg:nom:m3,beza/subst:pl:gen:f]"
 outputform="bez[bez/subst:sg:acc:m3,bez/subst:sg:nom:m3]">Jaki piękny jest 
dziki <marker>bez</marker>.</example>
+       </rule>
+       <rule>
+               <pattern>
+               <marker>
+                       <token>bez</token>
+               </marker>
+               <token postag_regexp="yes" postag=".*gen.*"></token>            
        
+               </pattern>              
+               <disambig postag="prep:gen"></disambig>
+               <example type="ambiguous" 
inputform="bez[bez/prep:gen,bez/subst:sg:acc:m3,bez/subst:sg:nom:m3,beza/subst:pl:gen:f]"
 outputform="bez[bez/prep:gen]">Proszę kremówkę <marker>bez</marker> 
kremu.</example>
+               <example type="untouched">Jaki piękny jest dziki 
<marker>bez</marker>.</example>
+       </rule>
+    </rulegroup>
+    
+    <rule name="od to nie oda" id="OD_PREP_GEN">
+    <pattern>                  
+               <marker>
+                       <token>od</token>
+               </marker>
+               <token postag_regexp="yes" postag=".*gen.*"></token>            
        
+               </pattern>              
+               <disambig postag="prep:gen"></disambig>    
+       <example type="untouched">Od rok 2000 jestem nieruchawy.</example>
+       <example type="ambiguous" 
inputform="Od[Oda/subst:pl:gen:f,od/prep:gen,oda/subst:pl:gen:f]" 
outputform="Od[od/prep:gen]"><marker>Od</marker> roku 2000 jestem 
nieruchawy.</example>
+    </rule>
+    
+    <rule name="dzięki czemu" id="dzieki_prep_dat">
+    <pattern>                  
+               <marker>
+                       <token>dzięki</token>
+               </marker>
+               <token postag_regexp="yes" postag=".*dat.*"></token>            
        
+               </pattern>              
+    <disambig postag="prep:dat"></disambig>    
+    <example type="untouched">Dzięki, mała!</example>
+    <example type="ambiguous" 
inputform="dzięki[dzięk/subst:pl:acc:m3,dzięk/subst:pl:nom:m3,dzięk/subst:pl:voc:m3,dzięka/subst:pl:acc:f,dzięka/subst:pl:nom:f,dzięka/subst:pl:voc:f,dzięka/subst:sg:gen:f,dzięki/prep:dat,dzięki/subst:pl:acc:p3,dzięki/subst:pl:nom:p3,dzięki/subst:pl:voc:p3]"
 outputform="dzięki[dzięki/prep:dat]">To wszystko <marker>dzięki</marker> 
pomocy europejskiej.</example>       
+    </rule>
+    
     <rule name="MIMO to nie wołacz" id="MIMO_NIE_VOC">
        <pattern>
                <token>mimo</token>
        </pattern>
        <disambig postag="prep:gen"/>           
     </rule>
-        
+    
+    <rule name="ku to nie subst" id="KU_NIE_SUBST">
+       <pattern>
+               <token>ku</token>
+       </pattern>
+       <disambig postag="prep:dat"/>
+    </rule>
+    
+    <rulegroup name="do - prep lub subst" id="DO_PREP_SUBST">
+    <rule>
+       <pattern>
+                       <marker><token>do</token></marker>
+                       <token>re</token>       
+       </pattern>
+       <disambig action="filter" postag="subst.*"/>
+    </rule>
+    <rule>
+       <pattern>
+                       <marker><token>do</token></marker>
+                       <token><exception>re</exception></token>        
+       </pattern>
+       <disambig postag="prep:gen"/>    
+    </rule>
+    </rulegroup>    
+            
     <rulegroup name="prep + czasownik -> ~ czasownik" id="prep_verb">
         <!--  przyimki zawsze z tym samym przypadkiem -->
         <rule>
@@ -218,6 +315,25 @@
                </pattern>
                <disambig postag="prep:gen"/>           
        </rule>
+               
+       <rule name="W wiek" id="w_wiek">
+                       <pattern>
+                       <token 
regexp="yes">M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])|\d+</token>
+                       <marker><token>w</token></marker>
+                       <token>.</token>
+                       </pattern>
+                       <disambig postag="brev:pun"/>
+                       <example type="ambiguous" 
inputform="w[w/prep:acc.loc,wiek/brev:pun]" 
outputform="w[wiek/brev:pun]">Dziwny był ten XX <marker>w</marker>.</example>
+                       <example type="untouched">Mam to w szufladzie.</example>
+       </rule>
+       <rule name="W prep" id="w_prep">
+                       <pattern>
+                               <token postag="prep:acc.loc">w</token>
+                       </pattern>
+                       <disambig postag="prep:acc.loc"/>
+                       <example type="ambiguous" 
inputform="w[w/prep:acc.loc,wiek/brev:pun]" outputform="w[w/prep:acc.loc]">Mam 
to <marker>w</marker> szufladzie.</example>
+                       <example type="untouched">Dziwny XIX w.</example>
+       </rule>
 
     <rule name="Przyimek z rzeczownikiem" id="PREP_SUBST">
         <pattern>
@@ -229,7 +345,7 @@
         <disambig action="unify"></disambig>
         <example type="untouched">Tu mamy piękny przykład.</example>
         <example type="untouched">Łaciny używamy na co dzień.</example>        
-        <example type="ambiguous" 
inputform="mamy[mama/subst:pl:acc:f,mama/subst:pl:nom:f,mama/subst:pl:voc:f,mama/subst:sg:gen:f]"
 outputform="mamy[mama/subst:sg:gen:f]">Idę do <marker>mamy</marker>.</example>
+        <!-- example type="ambiguous" 
inputform="mamy[mama/subst:pl:acc:f,mama/subst:pl:nom:f,mama/subst:pl:voc:f,mama/subst:sg:gen:f]"
 outputform="mamy[mama/subst:sg:gen:f]">Idę do <marker>mamy</marker>.</example> 
 -->
     </rule>
 
 
@@ -895,4 +1011,15 @@
        <disambig postag="interj"></disambig>
     </rule>
     
+    <rule name="Tel Awiw" id="TEL_AWIW">
+       <pattern case_sensitive="yes">
+               <marker>
+                       <token>Tel</token>
+               </marker>
+               <token inflected="yes">Awiw</token>
+       </pattern>
+       <disambig action="replace"><match no="2" postag_regexp="yes" 
postag="subst.*">Tel</match></disambig>
+    </rule>
+    
+    
 </rules>
\ No newline at end of file

Modified: trunk/JLanguageTool/src/rules/pl/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-13 17:16:33 UTC 
(rev 7336)
+++ trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-13 18:05:52 UTC 
(rev 7337)
@@ -3659,7 +3659,7 @@
                 <pattern>
                     <token postag="prep:gen"><exception 
regexp="yes">doń|zamiast</exception></token>
                     <marker>
-                        <token postag="subst:.*" 
postag_regexp="yes"><exception postag=".*gen.*|.*irreg.*|conj|adv.*" 
postag_regexp="yes"></exception><exception 
regexp="yes">.*um</exception><exception postag="&lt;/?ADV&gt;" 
postag_regexp="yes"></exception></token>
+                        <token postag="subst:.*" 
postag_regexp="yes"><exception postag=".*gen.*|.*irreg.*|conj|adv.*" 
postag_regexp="yes"></exception><exception 
regexp="yes">.*um|minus|plus</exception><exception postag="&lt;/?ADV&gt;" 
postag_regexp="yes"></exception></token>
                     </marker>
                 </pattern>
                 <message>Przyimek „\1” jest używany z dopełniaczem.</message>
@@ -3669,6 +3669,9 @@
                 <example type="incorrect">Język jest dla 
<marker>filozofem</marker> tylko topornym narzędziem.</example>
                 <example type="correct">Tu jest obok chałupa.</example>
                 <example type="incorrect">Przyszedł wreszcie sam do 
<marker>kierowniczka</marker>.</example>
+                <example type="correct">Dziki bez czarny jest to krzew wysoki 
trzy do sześciu metrów.</example>
+                <example type="correct">To dyskusje bez reszty akademickie, by 
nie rzec bezprzedmiotowe.</example>
+                <example type='correct'>Niezależnie od pomocy państwowej 
polska klasa robotnicza miewa się dobrze.</example>
             </rule>
             <rule>
                 <pattern>
@@ -3681,10 +3684,11 @@
                 <short>Przyimek wymaga celownika</short>
                 <example type="correct">Dzięki tym wysiłkom wszystko się 
udało.</example>
                 <example type="incorrect">Dzięki <marker>wysiłki</marker> 
wszystko się udało.</example>
+                <example type="correct">Akademia ku czci pięćdziesiątej 
rocznicy</example>
             </rule>
             <rule>
                 <pattern>
-                    <token postag="prep:nom"></token>
+                    <token 
postag="prep:nom"><exception>niby</exception></token>
                     <marker>
                         <token postag="subst:.*" 
postag_regexp="yes"><exception postag=".*nom.*|.*irreg.*|conj|adv.*" 
postag_regexp="yes"></exception><exception 
regexp="yes">.*um</exception><exception 
postag="&lt;/ADV&gt;"></exception></token>
                     </marker>
@@ -3693,6 +3697,7 @@
                 <short>Przyimek wymaga mianownika</short>
                 <example type="correct">Czekamy na pojedynek premier kontra 
wicepremier.</example>
                 <example type="incorrect">Czekamy na pojedynek premier kontra 
<marker>wicepremierowi</marker>.</example>
+                <example type="correct">Niby mieszka za nami i nie 
mieszka.</example>
             </rule>
             <rule>
                 <pattern>
@@ -6818,18 +6823,19 @@
             <rule>
                 <pattern>
                     <token>z</token>
-                    <token 
regexp="yes">s[wftśsrłndwpgklcbmżń].+|[zśźż][wftśsrłndwpgklzcbmżń].+|[rs]z[wftśsrłndwpgklzcbmżń].+|[wf][wftśwpgzcbmżń].+|wk[^ł].+|f[dsk].+|wd[^zię].+|ws[^ze].+|Lwowa|mną|mnie|snu<exception
 regexp="yes">[\&quot;„]</exception><exception 
postag="UNKNOWN"></exception></token>
+                    <token 
regexp="yes">s[wftśsrłndwpgklcbmżń].+|[zśźż][wftśsrłndwpgklzcbmżń].+|[rs]z[wftśsrłndwpgklzcbmżń].+|[wf][wftśwpgzcbmżń].+|wk[^ł].+|f[dsk].+|wd[^zię].+|ws[^ze].+|Lwowa|mną|mnie|snu<exception
 regexp="yes">[\&quot;„]|ZSRR</exception><exception postag="UNKNOWN"/></token>  
                  
                 </pattern>
                 <message>Prawdopodobnie ten przyimek należy zapisać tak: 
<suggestion>\1e \2</suggestion>.</message>
                 <short>Błąd fonetyczny</short>
                 <example type="correct">Ludwik ma kochankę ze Świecia</example>
+                <example type="correct">To przedni wyrób z ZSRR.</example>
                 <example correction="ze Świecia" type="incorrect">Ludwik ma 
kochankę <marker>z Świecia</marker>.</example>
             </rule>
             <rule>
                 <pattern>
                     <token>z</token>
                     <token regexp="yes">[\&quot;„]</token>
-                    <token 
regexp="yes">s[wftśsrłndwpgklcbmżń].+|[zśźż][wftśsrłndwpgklzcbmżń].+|[rs]z[wftśsrłndwpgklzcbmżń].+|[wf][wftśwpgzcbmżń].+|wk[^ł].+|f[dsk].+|wd[^zię].+|ws[^ze].+|Lwowa|mną|mnie|snu<exception
 regexp="yes">[\&quot;„]</exception><exception 
postag="UNKNOWN"></exception></token>
+                    <token 
regexp="yes">s[wftśsrłndwpgklcbmżń].+|[zśźż][wftśsrłndwpgklzcbmżń].+|[rs]z[wftśsrłndwpgklzcbmżń].+|[wf][wftśwpgzcbmżń].+|wk[^ł].+|f[dsk].+|wd[^zię].+|ws[^ze].+|Lwowa|mną|mnie|snu<exception
 regexp="yes">[\&quot;„]|ZSRR</exception><exception postag="UNKNOWN"/></token>
                 </pattern>
                 <message>Prawdopodobnie ten przyimek należy zapisać tak: 
<suggestion>ze \2\3</suggestion>.</message>
                 <short>Błąd fonetyczny</short>
@@ -6841,7 +6847,7 @@
             <rule>
                 <pattern>
                     <token>ze</token>
-                    <token negate="yes" 
regexp="yes">[wszśźżWSZŚŹŻ].+|[SRrr]z[wftśsrłndwpgklzcbmżń].+|trz.+|dw.+|Lwowa|mną|mnie|snu|łzami|czter.+<exception
 regexp="yes">[\&quot;„]</exception><exception 
postag="UNKNOWN"></exception></token>
+                    <token negate="yes" 
regexp="yes">[wszśźżWSZŚŹŻ].+|[SRrr]z[wftśsrłndwpgklzcbmżń].+|trz.+|dw.+|Lwowa|mną|mnie|snu|łzami|czter.+|ZSRR<exception
 regexp="yes">[\&quot;„]</exception><exception 
postag="UNKNOWN"></exception></token>
                 </pattern>
                 <message>Prawdopodobnie ten przyimek należy zapisać tak: 
<suggestion>z \2</suggestion>.</message>
                 <short>Błąd fonetyczny</short>
@@ -7543,9 +7549,7 @@
                 <token><exception postag="SENT_START"></exception></token>
                 <marker>
                     <and>
-
-                        <token inflected="yes" 
regexp="yes">(?iu)jezuita|benedyktyn|dominikanin|franciszkanin|cysters|albertyn|bazylianin|kameduła|karmelita|marianin|michalita|minimita|norbertanin|pasjonista|paulin|pijar|redemptorysta|salezjanin|bonifrater|trapista|werbista|dolorysta|barnabita|bartolomita|kamilianin|salwatorianin</token>
-
+                        <token inflected="yes" 
regexp="yes">(?iu)jezuita|benedyktyn|dominikanin|franciszkanin|cysters|albertyn|bazylianin|kameduła|karmelita|marianin|michalita|minimita|norbertanin|pasjonista|paulin|pijar|redemptorysta|salezjanin|bonifrater|trapista|werbista|dolorysta|barnabita|bartolomita|kamilianin|salwatorianin<exception
 regexp="yes">Marian|Dominikany</exception></token>
                         <token regexp="yes">[A-Z][\p{Ll}]+</token>
                     </and>
                 </marker>
@@ -7554,6 +7558,7 @@
             <short>Błędna pisownia wielką literą</short>
             <example correction="redemptoryści" type="incorrect">W Toruniu 
ogromne interesy prowadzą <marker>Redemptoryści</marker>.</example>
             <example type="correct">W Toruniu potęgą są jezuici.</example>
+            <example type="correct">Po zabójstwie Marian Płoński zaalarmował 
milicję.</example>
         </rule>
         <rule id="JEZYK_POLSKI" name="Pisownia nazw języków wielką literą">
             <pattern case_sensitive="yes">

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[7337] trunk/JLanguageTool/src

Reply via email to