Revision: 7553
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7553&view=rev
Author:   milek_pl
Date:     2012-06-29 12:28:12 +0000 (Fri, 29 Jun 2012)
Log Message:
-----------
[pl] fix false alarms, use better tokenization in the dictionary etc.

Modified Paths:
--------------
    trunk/JLanguageTool/src/resource/pl/disambiguation.xml
    trunk/JLanguageTool/src/resource/pl/hunspell/pl_PL.dict
    trunk/JLanguageTool/src/rules/pl/grammar.xml
    trunk/JLanguageTool/src/rules/pl/replace.txt
    trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java

Modified: trunk/JLanguageTool/src/resource/pl/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-29 
10:23:17 UTC (rev 7552)
+++ trunk/JLanguageTool/src/resource/pl/disambiguation.xml      2012-06-29 
12:28:12 UTC (rev 7553)
@@ -1077,4 +1077,36 @@
        <example type="ambiguous" inputform="m[metr/brev:npun]" 
outputform="m[morza/brev:pun]">Ta góra ma wysokość 20 m n. p. 
<marker>m</marker>.</example>
     </rule>
     
+    <rule name="co jako qub" id="co_qub">
+       <pattern>
+               <marker>
+               <token>co</token>
+               </marker>
+               <unify><feature id="gender"/>
+               <feature id="case"></feature>
+               <feature id="number"></feature>
+               <token postag="adj.*:com" postag_regexp="yes"/>
+               <token postag="(subst|ger).*" postag_regexp="yes"/>
+               </unify>
+       </pattern>
+       <disambig postag="qub"/>
+       <example type="untouched">I, co ważniejsze, jest to dzisiaj 
rzadkość.</example>
+       <example type="ambiguous" 
inputform="co[co/prep:acc.gen.nom,co/subst:sg:acc:n2,co/subst:sg:gen:n2,co/subst:sg:nom:n2]"
 outputform="co[co/qub]">Przy <marker>co</marker> trudniejszych fragmentach 
robił sobie notatki.</example>
+    </rule>
+    
+    <!-- TODO: remove both from Morfologik! -->
+    <rule name="C" id="C">
+       <pattern>
+       <token>C</token>
+       </pattern>
+       <disambig postag="qub"/>
+    </rule>
+    
+    <rule name="K" id="K">
+       <pattern>
+       <token>K</token>
+       </pattern>
+       <disambig postag="qub"/>
+    </rule>
+    
 </rules>
\ No newline at end of file

Modified: trunk/JLanguageTool/src/resource/pl/hunspell/pl_PL.dict
===================================================================
(Binary files differ)

Modified: trunk/JLanguageTool/src/rules/pl/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-29 10:23:17 UTC 
(rev 7552)
+++ trunk/JLanguageTool/src/rules/pl/grammar.xml        2012-06-29 12:28:12 UTC 
(rev 7553)
@@ -971,7 +971,7 @@
                         <token negate="yes">,<exception 
regexp="yes">[\p{Punct}–—„«»…]+</exception><exception postag="conj|SENT_START" 
postag_regexp="yes"></exception></token>
                         <token postag="prep:.*" postag_regexp="yes"><exception 
postag="&lt;/?ADV&gt;|&lt;/?PREP.*&gt;" 
postag_regexp="yes"></exception><exception regexp="yes">za|o</exception></token>
                     </marker>
-                    <token inflected="yes">co<exception 
postag="&lt;/?ADV&gt;|&lt;/?PREP.*&gt;" 
postag_regexp="yes"></exception><exception 
postag="co_innego|czym_innym|co_drugi" postag_regexp="yes"></exception></token>
+                    <token inflected="yes">co<exception 
postag="&lt;/?ADV&gt;|&lt;/?PREP.*&gt;" 
postag_regexp="yes"></exception><exception 
postag="co_innego|czym_innym|co_drugi|qub" 
postag_regexp="yes"></exception></token>
                     <token><exception 
regexp="yes">[\p{Punct}–\-—…]+</exception><exception 
postag="SENT_END"></exception></token>
                 </pattern>
                 <message>Prawdopodobnie brak przecinka. Powinno być: 
<suggestion><match no="1"></match>, <match 
no="2"></match></suggestion>.</message>
@@ -983,6 +983,7 @@
                 <example type="correct">Naprawdę nie masz za co 
dziękować.</example>
                 <example correction="Oto, do" type="incorrect"><marker>Oto 
do</marker> czego doprowadziły rządy ludzi niespełna rozumu.</example>
                 <example type="correct">Spotykamy się w co drugi 
piątek.</example>
+                <example type="correct">Ograniczyli ich użycie do minimum, np. 
pomagając sobie przy co trudniejszych animacjach.</example>
             </rule>            
             <rule>
                 <pattern>
@@ -12726,6 +12727,16 @@
                 <example type="correct">W tym roku Fundusz ze względu na 
zmienioną ustawę, która weszła w życie pierwszego października, robi dwa 
bilanse.</example>
             </rule>
         </rulegroup>
+        <rule id="COD_ZIEN" name="„cod zień” (co dzień)">
+               <pattern>
+                       <token>cod</token>
+                       <token>zień</token>
+               </pattern>
+               <message>Prawdopodobna literówka. Czy chodziło o <suggestion>co 
dzień</suggestion>?</message>
+               <short>Prawdopodobna literówka</short>
+               <example type="correct">Chodzę w tunikach na co dzień.</example>
+               <example type="incorrect">Chodzę w tunikach na <marker>cod 
zień</marker>.</example>
+        </rule>
     </category>
     <category name="Błędy frazeologiczne">
         <rule id="KROPLA_DZIEGCIU" name="„kropla dziegciu” (łyżka dziegciu)">

Modified: trunk/JLanguageTool/src/rules/pl/replace.txt
===================================================================
--- trunk/JLanguageTool/src/rules/pl/replace.txt        2012-06-29 10:23:17 UTC 
(rev 7552)
+++ trunk/JLanguageTool/src/rules/pl/replace.txt        2012-06-29 12:28:12 UTC 
(rev 7553)
@@ -9,4 +9,7 @@
 zawszę=zawsze
 parz=patrz
 przejcie=przejście
-slaby=słaby
\ No newline at end of file
+slaby=słaby
+Aachen=Akwizgran
+Chańcza=Hańcza
+bezsensowy=bezsensowny
\ No newline at end of file

Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-06-29 10:23:17 UTC (rev 7552)
+++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java        
2012-06-29 12:28:12 UTC (rev 7553)
@@ -142,7 +142,7 @@
     assertEquals(6, matches.size());
     tool.setListUnknownWords(true);
     matches = tool.check("This is not a Polish text.");
-    assertEquals(4, matches.size());
+    assertEquals(3, matches.size());
     assertEquals("[Polish, This, is, text]", 
tool.getUnknownWords().toString());
     //check positions relative to sentence ends    
     matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie 
ma przecinka.");

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
 2012-06-29 10:23:17 UTC (rev 7552)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
 2012-06-29 12:28:12 UTC (rev 7553)
@@ -36,7 +36,7 @@
         assertEquals(1, matches.length);
         assertEquals(0, matches[0].getFromPos());
         assertEquals(4, matches[0].getToPos());
-        assertEquals("Zolą", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("Zola", matches[0].getSuggestedReplacements().get(0));
 
         assertEquals(1, 
rule.match(langTool.getAnalyzedSentence("aõh")).length);
         assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to