Please enable this rule for Slovenian as well.
Lp, m.
2013/4/5 Paolo Bianchini <[email protected]>
> OK so I have a patch that adds another Java rule and uses it for the
> Italian language module.
>
> I tested the rules against wikipedia for italian and got no false alarms,
> same goes for several other languages such as Spanish and German.
>
> english has a couple of false alarms in bibliography entries.
>
> Lt me know what you think.
>
> Ciao
>
> Paolo
>
> ### Eclipse Workspace Patch 1.0
> #P languagetool
> Index:
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> ===================================================================
> ---
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> (revision 0)
> +++
> languagetool-core/src/main/java/org/languagetool/rules/WhitespaceBeforePunctuationRule.java
> (revision 0)
> @@ -0,0 +1,157 @@
> +/* LanguageTool, a natural language style checker
> + * Copyright (C) 2005 Daniel Naber (http://www.danielnaber.de)
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
> + * USA
> + */
> +package org.languagetool.rules;
> +
> +import java.util.ArrayList;
> +import java.util.List;
> +import java.util.ResourceBundle;
> +
> +import org.languagetool.AnalyzedSentence;
> +import org.languagetool.AnalyzedTokenReadings;
> +import org.languagetool.tools.StringTools;
> +
> +/**
> + * A rule that matches several punctuation signs such as : ; and %
> preceded by whitespace.
> + *
> + * BUG ID 3607406: no space before semicolon
> + *
> + * @author Paolo Bianchini
> + */
> +
> +public class WhitespaceBeforePunctuationRule extends Rule {
> +
> + public WhitespaceBeforePunctuationRule(final ResourceBundle messages) {
> + super(messages);
> + super.setCategory(new Category(messages.getString("category_misc")));
> + setLocQualityIssueType("typographical");
> + }
> +
> + @Override
> + public final String getId() {
> + return "WHITESPACE_PUNCTUATION";
> + }
> +
> + @Override
> + public final String getDescription() {
> + return messages.getString("desc_whitespace_before_punctuation");
> + }
> +
> + @Override
> + public final RuleMatch[] match(final AnalyzedSentence text) {
> + final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
> + final AnalyzedTokenReadings[] tokens = text.getTokens();
> + String prevToken = "";
> + String prevPrevToken = "";
> + boolean prevWhite = false;
> + int prevLen = 0;
> + for (int i = 0; i < tokens.length; i++) {
> + final String token = tokens[i].getToken();
> + final boolean isWhitespace = tokens[i].isWhitespace() ||
> StringTools.isNonBreakingWhitespace(token)
> + || tokens[i].isFieldCode();
> + String msg = null;
> + int fixLen = 0;
> + String suggestionText = null;
> + if (prevWhite) {
> + if (token.equals(":")) {
> + msg = messages.getString("no_space_before_colon");
> + suggestionText = ":";
> + fixLen = 1;
> + // exception case for figures such as " : 0"
> + if (i + 2 < tokens.length
> + && tokens[i + 1].isWhitespace()
> + && isNumberOrDot(tokens[i + 2].getToken())) {
> + msg = null;
> + }
> + } else if (token.equals(";")) {
> + msg = messages.getString("no_space_before_semicolon");
> + suggestionText = ";";
> + fixLen = 1;
> + } else if (token.equals("%")) {
> + msg = messages.getString("no_space_before_percentage");
> + suggestionText = "%";
> + fixLen = 1;
> + }
> + }
> + if (msg != null) {
> + final int fromPos = tokens[i - 1].getStartPos();
> + final int toPos = tokens[i - 1].getStartPos() + fixLen + prevLen;
> + // TODO: add some good short comment here
> + final RuleMatch ruleMatch = new RuleMatch(this, fromPos, toPos,
> msg);
> + ruleMatch.setSuggestedReplacement(suggestionText);
> + ruleMatches.add(ruleMatch);
> + }
> + prevPrevToken = prevToken;
> + prevToken = token;
> + prevWhite = isWhitespace && !tokens[i].isFieldCode(); //OOo code
> before comma/dot
> + prevLen = tokens[i].getToken().length();
> + }
> +
> + return toRuleMatchArray(ruleMatches);
> + }
> +
> + static boolean isNotQuoteOrHyphen(final String str) {
> + if (str.length() == 1) {
> + final char c = str.charAt(0);
> + if (c =='\'' || c == '-' || c == '”'
> + || c =='’' || c == '"' || c == '“'
> + || c == ',') {
> + return false;
> + }
> + } else {
> + return containsNoNumber(str);
> + }
> + return true;
> + }
> +
> + static boolean isNumberOrDot(final String str) {
> + final char c = str.charAt(0);
> + return (c == '.' || Character.isDigit(c));
> + }
> +
> + static boolean isLeftBracket(final String str) {
> + if (str.length() == 0) {
> + return false;
> + }
> + final char c = str.charAt(0);
> + return (c == '(' || c == '[' || c == '{');
> + }
> +
> + static boolean isRightBracket(final String str) {
> + if (str.length() == 0) {
> + return false;
> + }
> + final char c = str.charAt(0);
> + return (c == ')' || c == ']' || c == '}');
> + }
> +
> + static boolean containsNoNumber(final String str) {
> + for (int i = 0; i < str.length(); i++) {
> + if (Character.isDigit(str.charAt(i))) {
> + return false;
> + }
> + }
> + return true;
> + }
> +
> + @Override
> + public void reset() {
> + // nothing
> + }
> +
> +}
> Index:
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
> ===================================================================
> ---
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
> (revision 9815)
> +++
> languagetool-language-modules/it/src/main/resources/org/languagetool/MessagesBundle_it.properties
> (working copy)
> @@ -272,3 +272,17 @@
> tray_menu_enable_server = Avviare il servizio HTTP.
>
> tray_tooltip_server_running = LanguageTool (servizio HTTP attivo)
> +
> +
> +# 3607406 +
> +
> +no_space_before_colon = Non inserire uno spazio prima dei due punti
> +
> +no_space_before_semicolon = Non inserire uno spazio prima del punto e
> virgola
> +
> +no_space_before_percentage = Non inserire uno spazio prima del segno di
> percentuale
> +
> +desc_whitespace_before_punctuation = Utilizzo dello spazio prima di : ; %
> +
> +
> +# 3607406 -
> Index:
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
> ===================================================================
> ---
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
> (revision 9815)
> +++
> languagetool-language-modules/it/src/main/java/org/languagetool/language/Italian.java
> (working copy)
> @@ -30,6 +30,10 @@
> import org.languagetool.rules.WhitespaceRule;
> import org.languagetool.rules.WordRepeatRule;
> import org.languagetool.rules.it.MorfologikItalianSpellerRule;
> +// 3607406 +
> +import org.languagetool.rules.WhitespaceBeforePunctuationRule;
> +// 3607406 -
> +
> import org.languagetool.tagging.Tagger;
> import org.languagetool.tagging.it.ItalianTagger;
>
> @@ -79,6 +83,9 @@
> @Override
> public List<Class<? extends Rule>> getRelevantRules() {
> return Arrays.asList(
> +// 3607406 +
> + WhitespaceBeforePunctuationRule.class,
> +// 3607406 -
> CommaWhitespaceRule.class,
> DoublePunctuationRule.class,
> GenericUnpairedBracketsRule.class,
> Index:
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
> ===================================================================
> ---
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
> (revision 9815)
> +++
> languagetool-core/src/main/resources/org/languagetool/MessagesBundle.properties
> (working copy)
> @@ -73,6 +73,12 @@
>
> desc_spelling_short = Spelling mistake
>
> +# 3607406 +
> +
> +desc_whitespace_before_punctuation = Use of whitespace before colon,
> semicolon and percentage.
> +
> +# 3607406 -
> +
> double_dots_short = Two consecutive dots
>
> double_commas_short = Two consecutive comma
> @@ -203,6 +209,16 @@
>
> no_space_before_dot = Don't put a space before the full stop
>
> +# 3607406 +
> +
> +no_space_before_colon = Don't put a space before the colon
> +
> +no_space_before_semicolon = Don't put a space before the semicolon
> +
> +no_space_before_percentage = Don't put a space before the percentage
> +
> +# 3607406 -
> +
> pl = Polish
>
> repetition = Possible typo: you repeated a word
>
>
> On Apr 5, 2013, at 11:29 AM, Dominique Pellé wrote:
>
> > It's not entirely language independent. At least French does not
> > follow those rules. French typography rules use a narrow non-break
> > space before colon, semi-colon, question mark and exclamation mark.
> >
> > There is an easy rule of thumb to remember where to put a space in
> > French: all punctuation characters that require to raise the pen to draw
> > them (? ! ; : %) require a space before them. All punctuation characters
> > that do not require to raise the pen to draw them should not have a
> > space before them (dot, comma).
> >
> > But even if it's a common Java rule, it does not have to be enabled
> > for French so that would be fine.
> >
> > Regards
> > Dominique
> >
> > R.J. Baars <[email protected]> wrote:
> >
> >> I guess this is language independ, so be fixed in same rule as no space
> >> before comma.
> >>
> >> Ruud
> >>
> >>> All,
> >>>
> >>> I could fix this bug by coding the following rule within the Italian
> >>> grammar.xml (I also added a check for semicolon… I know, I could have
> used
> >>> a reg_exp :-) ).
> >>>
> >>> The question is, would this be the correct/acceptable way of handling
> this
> >>> bug or should it be handled differently (a java rule) and/or for a
> wider
> >>> set of languages?
> >>>
> >>> Ciao
> >>>
> >>> Paolo
> >>>
> >>>
> >>> <rulegroup name="spazio prima di : o di ;" id="GR_09_005">
> >>> <rule>
> >>> <pattern>
> >>> <token></token>
> >>> <token spacebefore="yes">:</token>
> >>> </pattern>
> >>> <message>Non lasciare uno spazio prima
> dei due punti:
> >>> <suggestion><match no="1"></match>:</suggestion>.</message>
> >>> <example type="incorrect">Comprammo tanti
> <marker>regali :</marker>
> >>> bambole, libri, vestiti.</example>
> >>> <example type="correct">Comprammo tanti
> <marker>regali:</marker>
> >>> bambole, libri, vestiti.</example>
> >>> </rule>
> >>> <rule>
> >>> <pattern>
> >>> <token></token>
> >>> <token spacebefore="yes">;</token>
> >>> </pattern>
> >>> <message>Non lasciare uno spazio prima
> del punto e virgola:
> >>> <suggestion><match no="1"></match>;</suggestion>.</message>
> >>> <example type="incorrect">Gli venne
> <marker>sonno ;</marker> e rimandò
> >>> all'indomani.</example>
> >>> <example type="correct">Gli venne
> <marker>sonno;</marker> e rimandò
> >>> all'indomani.</example>
> >>> </rule>
> >>> </rulegroup>
> >>>
> >>>
> ------------------------------------------------------------------------------
> >>> Minimize network downtime and maximize team effectiveness.
> >>> Reduce network management and security costs.Learn how to hire
> >>> the most talented Cisco Certified professionals. Visit the
> >>> Employer Resources Portal
> >>>
> http://www.cisco.com/web/learning/employer_resources/index.html_______________________________________________
> >>> Languagetool-devel mailing list
> >>> [email protected]
> >>> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
> >>>
> >>
> >>
> >>
> >>
> ------------------------------------------------------------------------------
> >> Minimize network downtime and maximize team effectiveness.
> >> Reduce network management and security costs.Learn how to hire
> >> the most talented Cisco Certified professionals. Visit the
> >> Employer Resources Portal
> >> http://www.cisco.com/web/learning/employer_resources/index.html
> >> _______________________________________________
> >> Languagetool-devel mailing list
> >> [email protected]
> >> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
> >
> >
> ------------------------------------------------------------------------------
> > Minimize network downtime and maximize team effectiveness.
> > Reduce network management and security costs.Learn how to hire
> > the most talented Cisco Certified professionals. Visit the
> > Employer Resources Portal
> > http://www.cisco.com/web/learning/employer_resources/index.html
> > _______________________________________________
> > Languagetool-devel mailing list
> > [email protected]
> > https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>
>
>
> ------------------------------------------------------------------------------
> Minimize network downtime and maximize team effectiveness.
> Reduce network management and security costs.Learn how to hire
> the most talented Cisco Certified professionals. Visit the
> Employer Resources Portal
> http://www.cisco.com/web/learning/employer_resources/index.html
> _______________________________________________
> Languagetool-devel mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/languagetool-devel
>
------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire
the most talented Cisco Certified professionals. Visit the
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
_______________________________________________
Languagetool-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-devel