[LanguageTool] SF.net SVN: languagetool:[7760] trunk/JLanguageTool/src
Revision: 7760 http://languagetool.svn.sourceforge.net/languagetool/?rev=7760view=rev Author: jaumeortola Date: 2012-07-27 16:59:12 + (Fri, 27 Jul 2012) Log Message: --- [ca] Java rule improved: AccentuationCheckRule Modified Paths: -- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java trunk/JLanguageTool/src/resource/ca/catalan.dict trunk/JLanguageTool/src/resource/ca/catalan_synth.dict trunk/JLanguageTool/src/resource/ca/disambiguation.xml trunk/JLanguageTool/src/rules/ca/verb_senseaccent_nom_ambaccent.txt trunk/JLanguageTool/src/test/org/languagetool/rules/ca/AccentuationCheckRuleTest.java Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java === --- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 11:43:33 UTC (rev 7759) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 16:59:12 UTC (rev 7760) @@ -58,7 +58,7 @@ private static final Pattern ARTICLE_EL_FS = Pattern.compile(la|l'|La|L'); private static final Pattern ARTICLE_EL_MP = Pattern.compile(els|Els); private static final Pattern ARTICLE_EL_FP = Pattern.compile(les|Les); -// private static final Pattern DETERMINANT = Pattern.compile(D[^R].*); + private static final Pattern DETERMINANT = Pattern.compile(D[^R].*); private static final Pattern DETERMINANT_MS = Pattern.compile(D[^R].[MC][SN].*); private static final Pattern DETERMINANT_FS = Pattern.compile(D[^R].[FC][SN].*); private static final Pattern DETERMINANT_MP = Pattern.compile(D[^R].[MC][PN].*); @@ -72,7 +72,7 @@ private static final Pattern ADJECTIU_MP = Pattern.compile(AQ.[MC][PN].*|V.P..PM|PX.MP.*); private static final Pattern ADJECTIU_FP = Pattern.compile(AQ.[FC][PN].*|V.P..PF|PX.FP.*); private static final Pattern INFINITIU = Pattern.compile(V.N.*); - private static final Pattern VERB_CONJUGAT = Pattern.compile(V.[^NGP].*); + private static final Pattern VERB_CONJUGAT = Pattern.compile(V.[^NGP].*|_GV_); private static final Pattern NOT_IN_PREV_TOKEN = Pattern.compile(VA.*|PP.*|P0.*|VSP.*); private static final Pattern BEFORE_ADJECTIVE_MS = Pattern.compile(SPS00|D[^R].[MC][SN].*|V.[^NGP].*|PX.*); private static final Pattern BEFORE_ADJECTIVE_FS = Pattern.compile(SPS00|D[^R].[FC][SN].*|V.[^NGP].*|PX.*); @@ -146,8 +146,8 @@ // verb without accent - noun with accent if (isRelevantWord) { - //amb renuncies -if (tokens[i-1].hasPosTag(SPS00) !matchPostagRegexp(tokens[i],INFINITIU) ) + //amb renuncies DETERMINANT +if (tokens[i-1].hasPosTag(SPS00) !matchPostagRegexp(tokens[i-1],DETERMINANT) !matchPostagRegexp(tokens[i],INFINITIU) ) { replacement = relevantWords.get(token).getToken(); } @@ -157,13 +157,23 @@ !token.equals(cantar) ) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) matchPostagRegexp(relevantWords.get(token),NOM_MP)) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) matchPostagRegexp(relevantWords.get(token),NOM_FS) - !token.equals(venia) !token.equals(tenia) !token.equals(continua) !token.equals(genera) ) + !token.equals(venia) !token.equals(tenia) !token.equals(continua) !token.equals(genera) !token.equals(faria)) ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) ) { replacement = relevantWords.get(token).getToken(); } + //fumaré una faria (correct: fària) + else if ( i2 matchPostagRegexp(tokens[i-2],VERB_CONJUGAT) +((matchPostagRegexp(tokens[i-1],DETERMINANT_MS) matchPostagRegexp(relevantWords.get(token),NOM_MS)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_MP) matchPostagRegexp(relevantWords.get(token),NOM_MP)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FS) matchPostagRegexp(relevantWords.get(token),NOM_FS)) + ||(matchPostagRegexp(tokens[i-1],DETERMINANT_FP) matchPostagRegexp(relevantWords.get(token),NOM_FP)) ) ) + { + replacement = relevantWords.get(token).getToken(); + } //circumstancies d'un altre caire else if ( !token.equals(venia) !token.equals(venies) !token.equals(tenia) !token.equals(tenies) + !token.equals(faria) !token.equals(faries) !token.equals(continua) !token.equals(continues) !token.equals(cantar) mPreposicioDE.matches() !matchPostagRegexp(tokens[i-1],NOT_IN_PREV_TOKEN) (itokens.length-2) !matchPostagRegexp(tokens[i+2],INFINITIU) @@ -173,6 +183,7 @@ } //la renuncia del president.
[LanguageTool] SF.net SVN: languagetool:[7761] trunk/JLanguageTool/src/rules/zh/grammar. xml
Revision: 7761 http://languagetool.svn.sourceforge.net/languagetool/?rev=7761view=rev Author: taolin2011 Date: 2012-07-27 17:09:32 + (Fri, 27 Jul 2012) Log Message: --- add some Chinese rules Modified Paths: -- trunk/JLanguageTool/src/rules/zh/grammar.xml Modified: trunk/JLanguageTool/src/rules/zh/grammar.xml === --- trunk/JLanguageTool/src/rules/zh/grammar.xml2012-07-27 16:59:12 UTC (rev 7760) +++ trunk/JLanguageTool/src/rules/zh/grammar.xml2012-07-27 17:09:32 UTC (rev 7761) @@ -20,6 +20,1066 @@ !-- 拼写: 词语错误 -- !-- == -- category name=词语错误 + rulegroup id=LIAN1_LIAN2_JIAN3 name=练/炼/拣 + rule + pattern + marker + token regexp=yes炼|拣/token + /marker + token postag=q|n|v|nr|vg|ag regexp=yes postag_regexp=yes笔|兵|达|功|武|习 + /token + /pattern + message + 您的意思是 + suggestion练/suggestion + \2? + /message + short词语错误/short + example type=incorrect correction=练 + 我出差以前需要 + marker炼/marker + 习一下意大利语. + /example + example type=correct她指导实习护士练习注射。/example + /rule + rule + pattern + token postag=v|a|ad regexp=yes postag_regexp=yes训|老|熟|干 + /token + marker + token regexp=yes炼|拣/token + /marker + /pattern + message + 您的意思是\1 + suggestion练/suggestion + ? + /message + short词语错误/short + example type=incorrect correction=练 + 实际上,她对他的干 + marker拣/marker + 再一次觉得很值得佩服。 + /example + example type=correct他是一个以干练与干劲闻名的军官。/example + /rule + rule + pattern + marker + token regexp=yes练|拣/token + /marker + token postag=ng|n regexp=yes postag_regexp=yes丹|钢|铁|焦|乳|油|狱 + /token + /pattern + message + 您的意思是 + suggestion炼/suggestion + \2? + /message + short词语错误/short + example type=incorrect correction=炼 + 漂亮的女人是眼睛的天堂、灵魂的地狱、钱包的 + marker练/marker + 狱。 + /example + example type=correct漂亮的女人是眼睛的天堂、灵魂的地狱、钱包的炼狱。/example + /rule + rule + pattern + token冶/token + marker + token regexp=yes练|拣/token + /marker + /pattern + message + 您的意思是\1 + suggestion炼/suggestion + ? + /message + short词语错误/short + example type=incorrect correction=炼 + 将溶液倒入容器中进行冶 + marker练/marker + 。 + /example + example
[LanguageTool] SF.net SVN: languagetool:[7762] trunk/JLanguageTool/src
Revision: 7762 http://languagetool.svn.sourceforge.net/languagetool/?rev=7762view=rev Author: jaumeortola Date: 2012-07-27 18:02:52 + (Fri, 27 Jul 2012) Log Message: --- [ca] Improved rule: DALT. Modified Paths: -- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java trunk/JLanguageTool/src/rules/ca/grammar.xml Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java === --- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 17:09:32 UTC (rev 7761) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ca/AccentuationCheckRule.java 2012-07-27 18:02:52 UTC (rev 7762) @@ -146,7 +146,7 @@ // verb without accent - noun with accent if (isRelevantWord) { - //amb renuncies DETERMINANT + //amb renuncies if (tokens[i-1].hasPosTag(SPS00) !matchPostagRegexp(tokens[i-1],DETERMINANT) !matchPostagRegexp(tokens[i],INFINITIU) ) { replacement = relevantWords.get(token).getToken(); Modified: trunk/JLanguageTool/src/rules/ca/grammar.xml === --- trunk/JLanguageTool/src/rules/ca/grammar.xml2012-07-27 17:09:32 UTC (rev 7761) +++ trunk/JLanguageTool/src/rules/ca/grammar.xml2012-07-27 18:02:52 UTC (rev 7762) @@ -2278,19 +2278,47 @@ example type=correctVa estar a punt de morir./example /rule /rulegroup -rule id=DALT name=d'alt/dalt -pattern -token regexp=yesa|fins|de/token -marker -tokend'/token -tokenalt/token -/marker -/pattern -messagePotser és: suggestiondalt/suggestion./message -shortConfusió./short -example type=incorrectVan pujar fins markerd'alt/marker./example -example type=correctVan pujar fins markerdalt/marker./example -/rule +rulegroup id=DALT name=*d'alt/dalt +rule +pattern +token regexp=yesa|fins|de/token +marker +tokend'/token +tokenalt/token +/marker +/pattern +messagePotser és: suggestiondalt/suggestion./message +shortConfusió./short +example type=incorrectVan pujar fins markerd'alt/marker./example +example type=correctVan pujar fins markerdalt/marker./example +/rule +rule +pattern +marker +tokend'/token +tokenalt/token +/marker +token postag=D[AID].* postag_regexp=yes/token +/pattern +messagePotser és: suggestiondalt/suggestion./message +shortConfusió./short +example type=incorrectVan pujar markerd'alt/marker la terrassa./example +example type=correctVan pujar dalt la terrassa./example +/rule +rule +pattern +marker +tokend'/token +tokenalt/token +/marker +token inflected=yesde/token +/pattern +messagePotser és: suggestiondalt/suggestion./message +shortConfusió./short +example type=incorrectVan pujar markerd'alt/marker de tot./example +example type=correctVan pujar dalt de tot./example +/rule +/rulegroup rulegroup id=DARRERA name=darrera/darrere rule pattern This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. -- Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ ___ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs
[LanguageTool] SF.net SVN: languagetool:[7765] trunk/JLanguageTool/src
Revision: 7765 http://languagetool.svn.sourceforge.net/languagetool/?rev=7765view=rev Author: jaumeortola Date: 2012-07-27 23:45:40 + (Fri, 27 Jul 2012) Log Message: --- [ca] Fixed some false alarms. Modified Paths: -- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRule.java trunk/JLanguageTool/src/resource/ca/disambiguation.xml trunk/JLanguageTool/src/rules/ca/grammar.xml Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRule.java === --- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRule.java 2012-07-27 22:18:47 UTC (rev 7764) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ca/ComplexAdjectiveConcordanceRule.java 2012-07-27 23:45:40 UTC (rev 7765) @@ -76,7 +76,7 @@ private static final Pattern COORDINACIO = Pattern.compile(,|i|o); private static final Pattern COORDINACIO_IONI = Pattern.compile(i|o|ni); private static final Pattern KEEP_COUNT = Pattern.compile(A.*|N.*|D[NAID].*|SPS.*|R.*|V.P.*); - private static final Pattern KEEP_COUNT2 = Pattern.compile(,|i|o|ni); + private static final Pattern KEEP_COUNT2 = Pattern.compile(,|i|o|ni|\\d+); private static final Pattern PREPOSICIONS = Pattern.compile(SPS.*); private static final Pattern VERB_AUXILIAR = Pattern.compile(V[AS].*); private static final Pattern EXCEPCIONS_PARTICIPI = Pattern.compile(atès|atés|atesa|atesos|ateses|donat|donats|donada|donades); Modified: trunk/JLanguageTool/src/resource/ca/disambiguation.xml === --- trunk/JLanguageTool/src/resource/ca/disambiguation.xml 2012-07-27 22:18:47 UTC (rev 7764) +++ trunk/JLanguageTool/src/resource/ca/disambiguation.xml 2012-07-27 23:45:40 UTC (rev 7765) @@ -4712,7 +4712,7 @@ marker and token postag=A.* postag_regexp=yesexception postag=_GN_.* postag_regexp=yes /exception regexp=yes\p{Lu}+[\p{Ll}·]*/exceptionexception regexp=yes[\p{Ll}·]+(e|[aei]sses|[ae]res|era)/exception/token -token postag=V.* postag_regexp=yesexception postag=_GV_ //token +token postag=V.* postag_regexp=yesexception postag=_GV_|VMIP1S0 postag_regexp=yes //token /and /marker /pattern @@ -5125,6 +5125,18 @@ disambig action=filter postag=I / /rule /rulegroup +rulegroup id=interjeccio name=interjeccions +rule +pattern +token postag=SENT_START|CC postag_regexp=yes/token +marker +tokenvaja/token +/marker +token regexp=yes,|\.\.\.|\./token +/pattern +disambig action=filter postag=I / +/rule +/rulegroup rule id=PI_N name=pronom indefinit i nom pattern and Modified: trunk/JLanguageTool/src/rules/ca/grammar.xml === --- trunk/JLanguageTool/src/rules/ca/grammar.xml2012-07-27 22:18:47 UTC (rev 7764) +++ trunk/JLanguageTool/src/rules/ca/grammar.xml2012-07-27 23:45:40 UTC (rev 7765) @@ -3051,10 +3051,8 @@ /pattern messageCal dir suggestionel match no=2/match/suggestion./message shortError ortogràfic/short -example type=incorrectmarkerl'iogurt/marker -/example -example type=correctmarkerel iogurt/marker -/example +example type=incorrectmarkerl'iogurt/marker/example +example type=correctmarkerel iogurt/marker/example /rule rule pattern @@ -3063,12 +3061,9 @@ /pattern messageCal dir suggestionla match no=2/match/suggestion./message shortError ortogràfic/short -example type=incorrectmarkerl'universitat/marker -/example -example type=correctmarkerla universitat/marker -/example +example type=incorrectmarkerl'universitat/marker/example +example type=correctmarkerla universitat/marker/example example type=correctl'intel·lectual valencià/example -example type=correctla intel·lectual valenciana/example /rule rule pattern @@ -3079,12 +3074,14 @@ token postag=_GN_MS / /and /marker -token postag=_GN_FS / +token postag=_GN_FSexception postag=_GN_MS//token /pattern messageCal dir suggestionla match