Revision: 7553
http://languagetool.svn.sourceforge.net/languagetool/?rev=7553&view=rev
Author: milek_pl
Date: 2012-06-29 12:28:12 +0000 (Fri, 29 Jun 2012)
Log Message:
-----------
[pl] fix false alarms, use better tokenization in the dictionary etc.
Modified Paths:
--------------
trunk/JLanguageTool/src/resource/pl/disambiguation.xml
trunk/JLanguageTool/src/resource/pl/hunspell/pl_PL.dict
trunk/JLanguageTool/src/rules/pl/grammar.xml
trunk/JLanguageTool/src/rules/pl/replace.txt
trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
Modified: trunk/JLanguageTool/src/resource/pl/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/pl/disambiguation.xml 2012-06-29
10:23:17 UTC (rev 7552)
+++ trunk/JLanguageTool/src/resource/pl/disambiguation.xml 2012-06-29
12:28:12 UTC (rev 7553)
@@ -1077,4 +1077,36 @@
<example type="ambiguous" inputform="m[metr/brev:npun]"
outputform="m[morza/brev:pun]">Ta góra ma wysokość 20 m n. p.
<marker>m</marker>.</example>
</rule>
+ <rule name="co jako qub" id="co_qub">
+ <pattern>
+ <marker>
+ <token>co</token>
+ </marker>
+ <unify><feature id="gender"/>
+ <feature id="case"></feature>
+ <feature id="number"></feature>
+ <token postag="adj.*:com" postag_regexp="yes"/>
+ <token postag="(subst|ger).*" postag_regexp="yes"/>
+ </unify>
+ </pattern>
+ <disambig postag="qub"/>
+ <example type="untouched">I, co ważniejsze, jest to dzisiaj
rzadkość.</example>
+ <example type="ambiguous"
inputform="co[co/prep:acc.gen.nom,co/subst:sg:acc:n2,co/subst:sg:gen:n2,co/subst:sg:nom:n2]"
outputform="co[co/qub]">Przy <marker>co</marker> trudniejszych fragmentach
robił sobie notatki.</example>
+ </rule>
+
+ <!-- TODO: remove both from Morfologik! -->
+ <rule name="C" id="C">
+ <pattern>
+ <token>C</token>
+ </pattern>
+ <disambig postag="qub"/>
+ </rule>
+
+ <rule name="K" id="K">
+ <pattern>
+ <token>K</token>
+ </pattern>
+ <disambig postag="qub"/>
+ </rule>
+
</rules>
\ No newline at end of file
Modified: trunk/JLanguageTool/src/resource/pl/hunspell/pl_PL.dict
===================================================================
(Binary files differ)
Modified: trunk/JLanguageTool/src/rules/pl/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/pl/grammar.xml 2012-06-29 10:23:17 UTC
(rev 7552)
+++ trunk/JLanguageTool/src/rules/pl/grammar.xml 2012-06-29 12:28:12 UTC
(rev 7553)
@@ -971,7 +971,7 @@
<token negate="yes">,<exception
regexp="yes">[\p{Punct}–—„«»…]+</exception><exception postag="conj|SENT_START"
postag_regexp="yes"></exception></token>
<token postag="prep:.*" postag_regexp="yes"><exception
postag="</?ADV>|</?PREP.*>"
postag_regexp="yes"></exception><exception regexp="yes">za|o</exception></token>
</marker>
- <token inflected="yes">co<exception
postag="</?ADV>|</?PREP.*>"
postag_regexp="yes"></exception><exception
postag="co_innego|czym_innym|co_drugi" postag_regexp="yes"></exception></token>
+ <token inflected="yes">co<exception
postag="</?ADV>|</?PREP.*>"
postag_regexp="yes"></exception><exception
postag="co_innego|czym_innym|co_drugi|qub"
postag_regexp="yes"></exception></token>
<token><exception
regexp="yes">[\p{Punct}–\-—…]+</exception><exception
postag="SENT_END"></exception></token>
</pattern>
<message>Prawdopodobnie brak przecinka. Powinno być:
<suggestion><match no="1"></match>, <match
no="2"></match></suggestion>.</message>
@@ -983,6 +983,7 @@
<example type="correct">Naprawdę nie masz za co
dziękować.</example>
<example correction="Oto, do" type="incorrect"><marker>Oto
do</marker> czego doprowadziły rządy ludzi niespełna rozumu.</example>
<example type="correct">Spotykamy się w co drugi
piątek.</example>
+ <example type="correct">Ograniczyli ich użycie do minimum, np.
pomagając sobie przy co trudniejszych animacjach.</example>
</rule>
<rule>
<pattern>
@@ -12726,6 +12727,16 @@
<example type="correct">W tym roku Fundusz ze względu na
zmienioną ustawę, która weszła w życie pierwszego października, robi dwa
bilanse.</example>
</rule>
</rulegroup>
+ <rule id="COD_ZIEN" name="„cod zień” (co dzień)">
+ <pattern>
+ <token>cod</token>
+ <token>zień</token>
+ </pattern>
+ <message>Prawdopodobna literówka. Czy chodziło o <suggestion>co
dzień</suggestion>?</message>
+ <short>Prawdopodobna literówka</short>
+ <example type="correct">Chodzę w tunikach na co dzień.</example>
+ <example type="incorrect">Chodzę w tunikach na <marker>cod
zień</marker>.</example>
+ </rule>
</category>
<category name="Błędy frazeologiczne">
<rule id="KROPLA_DZIEGCIU" name="„kropla dziegciu” (łyżka dziegciu)">
Modified: trunk/JLanguageTool/src/rules/pl/replace.txt
===================================================================
--- trunk/JLanguageTool/src/rules/pl/replace.txt 2012-06-29 10:23:17 UTC
(rev 7552)
+++ trunk/JLanguageTool/src/rules/pl/replace.txt 2012-06-29 12:28:12 UTC
(rev 7553)
@@ -9,4 +9,7 @@
zawszę=zawsze
parz=patrz
przejcie=przejście
-slaby=słaby
\ No newline at end of file
+slaby=słaby
+Aachen=Akwizgran
+Chańcza=Hańcza
+bezsensowy=bezsensowny
\ No newline at end of file
Modified: trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
2012-06-29 10:23:17 UTC (rev 7552)
+++ trunk/JLanguageTool/src/test/org/languagetool/JLanguageToolTest.java
2012-06-29 12:28:12 UTC (rev 7553)
@@ -142,7 +142,7 @@
assertEquals(6, matches.size());
tool.setListUnknownWords(true);
matches = tool.check("This is not a Polish text.");
- assertEquals(4, matches.size());
+ assertEquals(3, matches.size());
assertEquals("[Polish, This, is, text]",
tool.getUnknownWords().toString());
//check positions relative to sentence ends
matches = tool.check("To jest tekst.\nTest 1. To jest linia w której nie
ma przecinka.");
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
2012-06-29 10:23:17 UTC (rev 7552)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/pl/MorfologikPolishSpellerRuleTest.java
2012-06-29 12:28:12 UTC (rev 7553)
@@ -36,7 +36,7 @@
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(4, matches[0].getToPos());
- assertEquals("Zolą", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("Zola", matches[0].getSuggestedReplacements().get(0));
assertEquals(1,
rule.match(langTool.getAnalyzedSentence("aõh")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs