Please enable this rule for Slovenian as well.

Lp, m.


2013/4/5 Paolo Bianchini <[email protected]>

> OK so I have a patch that adds another Java rule and uses it for the
> Italian language module.
>
> I tested the rules against wikipedia for italian and got no false alarms,
> same goes for several other languages such as Spanish and German.
>
> english has a couple of false alarms in bibliography entries.
>
> Lt me know what you think.
>
> Ciao
>
> Paolo
>
> ### Eclipse Workspace Patch 1.0
> #P languagetool
> Index:
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> ===================================================================
> ---
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> (revision 0)
> +++
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> (revision 0)
> @@ -0,0 +1,157 @@
> +/* LanguageTool, a natural language style checker
> + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
> + * USA
> + */
> +package org.languagetool.rules;
> +
> +import java.util.ArrayList;
> +import java.util.List;
> +import java.util.ResourceBundle;
> +
> +import org.languagetool.AnalyzedSentence;
> +import org.languagetool.AnalyzedTokenReadings;
> +import org.languagetool.tools.StringTools;
> +
> +/**
> + * A rule that matches several punctuation signs such as : ; and %
> preceded by whitespace.
> + *
> + * BUG ID 3607406: no space before semicolon
> + *
> + * @author Paolo Bianchini
> + */
> +
> +public class WhitespaceBeforePunctuationRule extends Rule {
> +
> +  public WhitespaceBeforePunctuationRule(final ResourceBundle messages) {
> +    super(messages);
> +    super.setCategory(new Category(messages.getString("category_misc")));
> +    setLocQualityIssueType("typographical");
> +  }
> +
> +  @Override
> +  public final String getId() {
> +    return "WHITESPACE_PUNCTUATION";
> +  }
> +
> +  @Override
> +  public final String getDescription() {
> +    return messages.getString("desc_whitespace_before_punctuation");
> +  }
> +
> +  @Override
> +  public final RuleMatch[] match(final AnalyzedSentence text) {
> +    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
> +    final AnalyzedTokenReadings[] tokens = text.getTokens();
> +    String prevToken = "";
> +    String prevPrevToken = "";
> +    boolean prevWhite = false;
> +    int prevLen = 0;
> +    for (int i = 0; i < tokens.length; i++) {
> +      final String token = tokens[i].getToken();
> +      final boolean isWhitespace = tokens[i].isWhitespace() ||
> StringTools.isNonBreakingWhitespace(token)
> +              || tokens[i].isFieldCode();
> +      String msg = null;
> +      int fixLen = 0;
> +      String suggestionText = null;
> +      if (prevWhite) {
> +        if (token.equals(":")) {
> +            msg = messages.getString("no_space_before_colon");
> +            suggestionText = ":";
> +            fixLen = 1;
> +            // exception case for figures such as " : 0"
> +            if (i + 2 < tokens.length
> +                && tokens[i + 1].isWhitespace()
> +               && isNumberOrDot(tokens[i + 2].getToken())) {
> +              msg = null;
> +            }
> +        } else if (token.equals(";")) {
> +            msg = messages.getString("no_space_before_semicolon");
> +            suggestionText = ";";
> +            fixLen = 1;
> +        } else if (token.equals("%")) {
> +            msg = messages.getString("no_space_before_percentage");
> +            suggestionText = "%";
> +            fixLen = 1;
> +        }
> +      }
> +      if (msg != null) {
> +        final int fromPos = tokens[i - 1].getStartPos();
> +        final int toPos = tokens[i - 1].getStartPos() + fixLen + prevLen;
> +        // TODO: add some good short comment here
> +        final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos,
> msg);
> +        ruleMatch.setSuggestedReplacement(suggestionText);
> +        ruleMatches.add(ruleMatch);
> +      }
> +      prevPrevToken = prevToken;
> +      prevToken = token;
> +      prevWhite = isWhitespace && !tokens[i].isFieldCode(); //OOo code
> before comma/dot
> +      prevLen = tokens[i].getToken().length();
> +    }
> +
> +    return toRuleMatchArray(ruleMatches);
> +  }
> +
> +  static boolean isNotQuoteOrHyphen(final String str) {
> +    if (str.length() == 1) {
> +      final char c = str.charAt(0);
> +      if (c =='\'' || c == '-' || c == '”'
> +        || c =='’' || c == '"' || c == '“'
> +        || c == ',') {
> +        return false;
> +      }
> +    } else {
> +      return containsNoNumber(str);
> +    }
> +    return true;
> +  }
> +
> +  static boolean isNumberOrDot(final String str) {
> +    final char c = str.charAt(0);
> +    return (c == '.' || Character.isDigit(c));
> +  }
> +
> +  static boolean isLeftBracket(final String str) {
> +    if (str.length() == 0) {
> +      return false;
> +    }
> +    final char c = str.charAt(0);
> +    return (c == '(' || c == '[' || c == '{');
> +  }
> +
> +  static boolean isRightBracket(final String str) {
> +    if (str.length() == 0) {
> +      return false;
> +    }
> +    final char c = str.charAt(0);
> +    return (c == ')' || c == ']' || c == '}');
> +  }
> +
> +  static boolean containsNoNumber(final String str) {
> +    for (int i = 0; i < str.length(); i++) {
> +      if (Character.isDigit(str.charAt(i))) {
> +        return false;
> +      }
> +    }
> +    return true;
> +  }
> +
> +  @Override
> +  public void reset() {
> +    // nothing
> +  }
> +
> +}
> Index:
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
> ===================================================================
> ---
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
>   (revision 9815)
> +++
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
>   (working copy)
> @@ -272,3 +272,17 @@
>  tray_menu_enable_server = Avviare il servizio HTTP.
>
>  tray_tooltip_server_running = LanguageTool (servizio HTTP attivo)
> +
> +
> +# 3607406 +
> +
> +no_space_before_colon = Non inserire uno spazio prima dei due punti
> +
> +no_space_before_semicolon = Non inserire uno spazio prima del punto e
> virgola
> +
> +no_space_before_percentage = Non inserire uno spazio prima del segno di
> percentuale
> +
> +desc_whitespace_before_punctuation = Utilizzo dello spazio prima di : ; %
> +
> +
> +# 3607406 -
> Index:
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
> ===================================================================
> ---
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
>       (revision 9815)
> +++
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
>       (working copy)
> @@ -30,6 +30,10 @@
>  import org.languagetool.rules.WhitespaceRule;
>  import org.languagetool.rules.WordRepeatRule;
>  import org.languagetool.rules.it.MorfologikItalianSpellerRule;
> +// 3607406 +
> +import org.languagetool.rules.WhitespaceBeforePunctuationRule;
> +// 3607406 -
> +
>  import org.languagetool.tagging.Tagger;
>  import org.languagetool.tagging.it.ItalianTagger;
>
> @@ -79,6 +83,9 @@
>    @Override
>    public List<Class<? extends Rule>> getRelevantRules() {
>      return Arrays.asList(
> +// 3607406 +
> +               WhitespaceBeforePunctuationRule.class,
> +// 3607406 -
>              CommaWhitespaceRule.class,
>              DoublePunctuationRule.class,
>              GenericUnpairedBracketsRule.class,
> Index:
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
> ===================================================================
> ---
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
>     (revision 9815)
> +++
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
>     (working copy)
> @@ -73,6 +73,12 @@
>
>  desc_spelling_short = Spelling mistake
>
> +# 3607406 +
> +
> +desc_whitespace_before_punctuation = Use of whitespace before colon,
> semicolon and percentage.
> +
> +# 3607406 -
> +
>  double_dots_short = Two consecutive dots
>
>  double_commas_short = Two consecutive comma
> @@ -203,6 +209,16 @@
>
>  no_space_before_dot = Don't put a space before the full stop
>
> +# 3607406 +
> +
> +no_space_before_colon = Don't put a space before the colon
> +
> +no_space_before_semicolon = Don't put a space before the semicolon
> +
> +no_space_before_percentage = Don't put a space before the percentage
> +
> +# 3607406 -
> +
>  pl = Polish
>
>  repetition = Possible typo: you repeated a word
>
>
> On Apr 5, 2013, at 11:29 AM, Dominique Pellé wrote:
>
> > It's not entirely language independent. At least French does not
> > follow those rules. French typography rules use a narrow non-break
> > space before colon, semi-colon, question mark and exclamation mark.
> >
> > There is an easy rule of thumb to remember where to put a space in
> > French: all punctuation characters that require to raise the pen to draw
> > them (? ! ; : %) require a space before them. All punctuation characters
> > that do not require to raise the pen to draw them should not have a
> > space before them (dot, comma).
> >
> > But even if it's a common Java rule, it does not have to be enabled
> > for French so that would be fine.
> >
> > Regards
> > Dominique
> >
> > R.J. Baars <[email protected]> wrote:
> >
> >> I guess this is language independ, so be fixed in same rule as no space
> >> before comma.
> >>
> >> Ruud
> >>
> >>> All,
> >>>
> >>> I could fix this bug by coding the following rule within the Italian
> >>> grammar.xml (I also added a check for semicolon… I know, I could have
> used
> >>> a reg_exp :-) ).
> >>>
> >>> The question is, would this be the correct/acceptable way of handling
> this
> >>> bug or should it be handled differently (a java rule) and/or for a
> wider
> >>> set of languages?
> >>>
> >>> Ciao
> >>>
> >>> Paolo
> >>>
> >>>
> >>>              <rulegroup name="spazio prima di : o di ;" id="GR_09_005">
> >>>                  <rule>
> >>>                      <pattern>
> >>>                          <token></token>
> >>>                          <token spacebefore="yes">:</token>
> >>>                      </pattern>
> >>>                              <message>Non lasciare uno spazio prima
> dei due punti:
> >>> <suggestion><match no="1"></match>:</suggestion>.</message>
> >>>                              <example type="incorrect">Comprammo tanti
> <marker>regali :</marker>
> >>> bambole, libri, vestiti.</example>
> >>>                              <example type="correct">Comprammo tanti
> <marker>regali:</marker>
> >>> bambole, libri, vestiti.</example>
> >>>                      </rule>
> >>>                  <rule>
> >>>                      <pattern>
> >>>                          <token></token>
> >>>                          <token spacebefore="yes">;</token>
> >>>                      </pattern>
> >>>                              <message>Non lasciare uno spazio prima
> del punto e virgola:
> >>> <suggestion><match no="1"></match>;</suggestion>.</message>
> >>>                              <example type="incorrect">Gli venne
> <marker>sonno ;</marker> e rimandò
> >>> all'indomani.</example>
> >>>                              <example type="correct">Gli venne
> <marker>sonno;</marker> e rimandò
> >>> all'indomani.</example>
> >>>                      </rule>
> >>>              </rulegroup>
> >>>
> >>>
> ------------------------------------------------------------------------------
> >>> Minimize network downtime and maximize team effectiveness.
> >>> Reduce network management and security costs.Learn how to hire
> >>> the most talented Cisco Certified professionals. Visit the
> >>> Employer Resources Portal
> >>>
> http://www.cisco.com/web/learning/employer_resources/index.html_______________________________________________
> >>> Languagetool-devel mailing list
> >>> [email protected]
> >>> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
> >>>
> >>
> >>
> >>
> >>
> ------------------------------------------------------------------------------
> >> Minimize network downtime and maximize team effectiveness.
> >> Reduce network management and security costs.Learn how to hire
> >> the most talented Cisco Certified professionals. Visit the
> >> Employer Resources Portal
> >> http://www.cisco.com/web/learning/employer_resources/index.html
> >> _______________________________________________
> >> Languagetool-devel mailing list
> >> [email protected]
> >> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
> >
> >
> ------------------------------------------------------------------------------
> > Minimize network downtime and maximize team effectiveness.
> > Reduce network management and security costs.Learn how to hire
> > the most talented Cisco Certified professionals. Visit the
> > Employer Resources Portal
> > http://www.cisco.com/web/learning/employer_resources/index.html
> > _______________________________________________
> > Languagetool-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>
>
>
> ------------------------------------------------------------------------------
> Minimize network downtime and maximize team effectiveness.
> Reduce network management and security costs.Learn how to hire
> the most talented Cisco Certified professionals. Visit the
> Employer Resources Portal
> http://www.cisco.com/web/learning/employer_resources/index.html
> _______________________________________________
> Languagetool-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>
------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire 
the most talented Cisco Certified professionals. Visit the 
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
_______________________________________________
Languagetool-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-devel

Reply via email to