sure! It is going to need translation of some strings.
Paolo
On 05/apr/2013, at 21:14, Martin Srebotnjak <[email protected]> wrote:
> Please enable this rule for Slovenian as well.
>
> Lp, m.
>
>
> 2013/4/5 Paolo Bianchini <[email protected]>
>> OK so I have a patch that adds another Java rule and uses it for the Italian
>> language module.
>>
>> I tested the rules against wikipedia for italian and got no false alarms,
>> same goes for several other languages such as Spanish and German.
>>
>> english has a couple of false alarms in bibliography entries.
>>
>> Lt me know what you think.
>>
>> Ciao
>>
>> Paolo
>>
>> ### Eclipse Workspace Patch 1.0
>> #P languagetool
>> Index:
>> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
>> ===================================================================
>> ---
>> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
>> (revision 0)
>> +++
>> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
>> (revision 0)
>> @@ -0,0 +1,157 @@
>> +/* LanguageTool, a natural language style checker
>> + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
>> + *
>> + * This library is free software; you can redistribute it and/or
>> + * modify it under the terms of the GNU Lesser General Public
>> + * License as published by the Free Software Foundation; either
>> + * version 2.1 of the License, or (at your option) any later version.
>> + *
>> + * This library is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
>> + * Lesser General Public License for more details.
>> + *
>> + * You should have received a copy of the GNU Lesser General Public
>> + * License along with this library; if not, write to the Free Software
>> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
>> + * USA
>> + */
>> +package org.languagetool.rules;
>> +
>> +import java.util.ArrayList;
>> +import java.util.List;
>> +import java.util.ResourceBundle;
>> +
>> +import org.languagetool.AnalyzedSentence;
>> +import org.languagetool.AnalyzedTokenReadings;
>> +import org.languagetool.tools.StringTools;
>> +
>> +/**
>> + * A rule that matches several punctuation signs such as : ; and % preceded
>> by whitespace.
>> + *
>> + * BUG ID 3607406: no space before semicolon
>> + *
>> + * @author Paolo Bianchini
>> + */
>> +
>> +public class WhitespaceBeforePunctuationRule extends Rule {
>> +
>> + public WhitespaceBeforePunctuationRule(final ResourceBundle messages) {
>> + super(messages);
>> + super.setCategory(new Category(messages.getString("category_misc")));
>> + setLocQualityIssueType("typographical");
>> + }
>> +
>> + @Override
>> + public final String getId() {
>> + return "WHITESPACE_PUNCTUATION";
>> + }
>> +
>> + @Override
>> + public final String getDescription() {
>> + return messages.getString("desc_whitespace_before_punctuation");
>> + }
>> +
>> + @Override
>> + public final RuleMatch[] match(final AnalyzedSentence text) {
>> + final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
>> + final AnalyzedTokenReadings[] tokens = text.getTokens();
>> + String prevToken = "";
>> + String prevPrevToken = "";
>> + boolean prevWhite = false;
>> + int prevLen = 0;
>> + for (int i = 0; i < tokens.length; i++) {
>> + final String token = tokens[i].getToken();
>> + final boolean isWhitespace = tokens[i].isWhitespace() ||
>> StringTools.isNonBreakingWhitespace(token)
>> + || tokens[i].isFieldCode();
>> + String msg = null;
>> + int fixLen = 0;
>> + String suggestionText = null;
>> + if (prevWhite) {
>> + if (token.equals(":")) {
>> + msg = messages.getString("no_space_before_colon");
>> + suggestionText = ":";
>> + fixLen = 1;
>> + // exception case for figures such as " : 0"
>> + if (i + 2 < tokens.length
>> + && tokens[i + 1].isWhitespace()
>> + && isNumberOrDot(tokens[i + 2].getToken())) {
>> + msg = null;
>> + }
>> + } else if (token.equals(";")) {
>> + msg = messages.getString("no_space_before_semicolon");
>> + suggestionText = ";";
>> + fixLen = 1;
>> + } else if (token.equals("%")) {
>> + msg = messages.getString("no_space_before_percentage");
>> + suggestionText = "%";
>> + fixLen = 1;
>> + }
>> + }
>> + if (msg != null) {
>> + final int fromPos = tokens[i - 1].getStartPos();
>> + final int toPos = tokens[i - 1].getStartPos() + fixLen + prevLen;
>> + // TODO: add some good short comment here
>> + final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos,
>> msg);
>> + ruleMatch.setSuggestedReplacement(suggestionText);
>> + ruleMatches.add(ruleMatch);
>> + }
>> + prevPrevToken = prevToken;
>> + prevToken = token;
>> + prevWhite = isWhitespace && !tokens[i].isFieldCode(); //OOo code
>> before comma/dot
>> + prevLen = tokens[i].getToken().length();
>> + }
>> +
>> + return toRuleMatchArray(ruleMatches);
>> + }
>> +
>> + static boolean isNotQuoteOrHyphen(final String str) {
>> + if (str.length() == 1) {
>> + final char c = str.charAt(0);
>> + if (c =='\'' || c == '-' || c == '”'
>> + || c =='’' || c == '"' || c == '“'
>> + || c == ',') {
>> + return false;
>> + }
>> + } else {
>> + return containsNoNumber(str);
>> + }
>> + return true;
>> + }
>> +
>> + static boolean isNumberOrDot(final String str) {
>> + final char c = str.charAt(0);
>> + return (c == '.' || Character.isDigit(c));
>> + }
>> +
>> + static boolean isLeftBracket(final String str) {
>> + if (str.length() == 0) {
>> + return false;
>> + }
>> + final char c = str.charAt(0);
>> + return (c == '(' || c == '[' || c == '{');
>> + }
>> +
>> + static boolean isRightBracket(final String str) {
>> + if (str.length() == 0) {
>> + return false;
>> + }
>> + final char c = str.charAt(0);
>> + return (c == ')' || c == ']' || c == '}');
>> + }
>> +
>> + static boolean containsNoNumber(final String str) {
>> + for (int i = 0; i < str.length(); i++) {
>> + if (Character.isDigit(str.charAt(i))) {
>> + return false;
>> + }
>> + }
>> + return true;
>> + }
>> +
>> + @Override
>> + public void reset() {
>> + // nothing
>> + }
>> +
>> +}
>> Index:
>> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
>> ===================================================================
>> ---
>> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
>> (revision 9815)
>> +++
>> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
>> (working copy)
>> @@ -272,3 +272,17 @@
>> tray_menu_enable_server = Avviare il servizio HTTP.
>>
>> tray_tooltip_server_running = LanguageTool (servizio HTTP attivo)
>> +
>> +
>> +# 3607406 +
>> +
>> +no_space_before_colon = Non inserire uno spazio prima dei due punti
>> +
>> +no_space_before_semicolon = Non inserire uno spazio prima del punto e
>> virgola
>> +
>> +no_space_before_percentage = Non inserire uno spazio prima del segno di
>> percentuale
>> +
>> +desc_whitespace_before_punctuation = Utilizzo dello spazio prima di : ; %
>> +
>> +
>> +# 3607406 -
>> Index:
>> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
>> ===================================================================
>> ---
>> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
>> (revision 9815)
>> +++
>> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
>> (working copy)
>> @@ -30,6 +30,10 @@
>> import org.languagetool.rules.WhitespaceRule;
>> import org.languagetool.rules.WordRepeatRule;
>> import org.languagetool.rules.it.MorfologikItalianSpellerRule;
>> +// 3607406 +
>> +import org.languagetool.rules.WhitespaceBeforePunctuationRule;
>> +// 3607406 -
>> +
>> import org.languagetool.tagging.Tagger;
>> import org.languagetool.tagging.it.ItalianTagger;
>>
>> @@ -79,6 +83,9 @@
>> @Override
>> public List<Class<? extends Rule>> getRelevantRules() {
>> return Arrays.asList(
>> +// 3607406 +
>> + WhitespaceBeforePunctuationRule.class,
>> +// 3607406 -
>> CommaWhitespaceRule.class,
>> DoublePunctuationRule.class,
>> GenericUnpairedBracketsRule.class,
>> Index:
>> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
>> ===================================================================
>> ---
>> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
>> (revision 9815)
>> +++
>> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
>> (working copy)
>> @@ -73,6 +73,12 @@
>>
>> desc_spelling_short = Spelling mistake
>>
>> +# 3607406 +
>> +
>> +desc_whitespace_before_punctuation = Use of whitespace before colon,
>> semicolon and percentage.
>> +
>> +# 3607406 -
>> +
>> double_dots_short = Two consecutive dots
>>
>> double_commas_short = Two consecutive comma
>> @@ -203,6 +209,16 @@
>>
>> no_space_before_dot = Don't put a space before the full stop
>>
>> +# 3607406 +
>> +
>> +no_space_before_colon = Don't put a space before the colon
>> +
>> +no_space_before_semicolon = Don't put a space before the semicolon
>> +
>> +no_space_before_percentage = Don't put a space before the percentage
>> +
>> +# 3607406 -
>> +
>> pl = Polish
>>
>> repetition = Possible typo: you repeated a word
>>
>>
>> On Apr 5, 2013, at 11:29 AM, Dominique Pellé wrote:
>>
>> > It's not entirely language independent. At least French does not
>> > follow those rules. French typography rules use a narrow non-break
>> > space before colon, semi-colon, question mark and exclamation mark.
>> >
>> > There is an easy rule of thumb to remember where to put a space in
>> > French: all punctuation characters that require to raise the pen to draw
>> > them (? ! ; : %) require a space before them. All punctuation characters
>> > that do not require to raise the pen to draw them should not have a
>> > space before them (dot, comma).
>> >
>> > But even if it's a common Java rule, it does not have to be enabled
>> > for French so that would be fine.
>> >
>> > Regards
>> > Dominique
>> >
>> > R.J. Baars <[email protected]> wrote:
>> >
>> >> I guess this is language independ, so be fixed in same rule as no space
>> >> before comma.
>> >>
>> >> Ruud
>> >>
>> >>> All,
>> >>>
>> >>> I could fix this bug by coding the following rule within the Italian
>> >>> grammar.xml (I also added a check for semicolon… I know, I could have
>> >>> used
>> >>> a reg_exp :-) ).
>> >>>
>> >>> The question is, would this be the correct/acceptable way of handling
>> >>> this
>> >>> bug or should it be handled differently (a java rule) and/or for a wider
>> >>> set of languages?
>> >>>
>> >>> Ciao
>> >>>
>> >>> Paolo
>> >>>
>> >>>
>> >>> <rulegroup name="spazio prima di : o di ;" id="GR_09_005">
>> >>> <rule>
>> >>> <pattern>
>> >>> <token></token>
>> >>> <token spacebefore="yes">:</token>
>> >>> </pattern>
>> >>> <message>Non lasciare uno spazio prima dei
>> >>> due punti:
>> >>> <suggestion><match no="1"></match>:</suggestion>.</message>
>> >>> <example type="incorrect">Comprammo tanti
>> >>> <marker>regali :</marker>
>> >>> bambole, libri, vestiti.</example>
>> >>> <example type="correct">Comprammo tanti
>> >>> <marker>regali:</marker>
>> >>> bambole, libri, vestiti.</example>
>> >>> </rule>
>> >>> <rule>
>> >>> <pattern>
>> >>> <token></token>
>> >>> <token spacebefore="yes">;</token>
>> >>> </pattern>
>> >>> <message>Non lasciare uno spazio prima del
>> >>> punto e virgola:
>> >>> <suggestion><match no="1"></match>;</suggestion>.</message>
>> >>> <example type="incorrect">Gli venne
>> >>> <marker>sonno ;</marker> e rimandò
>> >>> all'indomani.</example>
>> >>> <example type="correct">Gli venne
>> >>> <marker>sonno;</marker> e rimandò
>> >>> all'indomani.</example>
>> >>> </rule>
>> >>> </rulegroup>
>> >>>
>> >>> ------------------------------------------------------------------------------
>> >>> Minimize network downtime and maximize team effectiveness.
>> >>> Reduce network management and security costs.Learn how to hire
>> >>> the most talented Cisco Certified professionals. Visit the
>> >>> Employer Resources Portal
>> >>> http://www.cisco.com/web/learning/employer_resources/index.html_______________________________________________
>> >>> Languagetool-devel mailing list
>> >>> [email protected]
>> >>> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>> >>>
>> >>
>> >>
>> >>
>> >> ------------------------------------------------------------------------------
>> >> Minimize network downtime and maximize team effectiveness.
>> >> Reduce network management and security costs.Learn how to hire
>> >> the most talented Cisco Certified professionals. Visit the
>> >> Employer Resources Portal
>> >> http://www.cisco.com/web/learning/employer_resources/index.html
>> >> _______________________________________________
>> >> Languagetool-devel mailing list
>> >> [email protected]
>> >> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>> >
>> > ------------------------------------------------------------------------------
>> > Minimize network downtime and maximize team effectiveness.
>> > Reduce network management and security costs.Learn how to hire
>> > the most talented Cisco Certified professionals. Visit the
>> > Employer Resources Portal
>> > http://www.cisco.com/web/learning/employer_resources/index.html
>> > _______________________________________________
>> > Languagetool-devel mailing list
>> > [email protected]
>> > https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>>
>>
>> ------------------------------------------------------------------------------
>> Minimize network downtime and maximize team effectiveness.
>> Reduce network management and security costs.Learn how to hire
>> the most talented Cisco Certified professionals. Visit the
>> Employer Resources Portal
>> http://www.cisco.com/web/learning/employer_resources/index.html
>> _______________________________________________
>> Languagetool-devel mailing list
>> [email protected]
>> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>
> ------------------------------------------------------------------------------
> Minimize network downtime and maximize team effectiveness.
> Reduce network management and security costs.Learn how to hire
> the most talented Cisco Certified professionals. Visit the
> Employer Resources Portal
> http://www.cisco.com/web/learning/employer_resources/index.html
> _______________________________________________
> Languagetool-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire
the most talented Cisco Certified professionals. Visit the
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
_______________________________________________
Languagetool-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-devel