Revision: 7053
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7053&view=rev
Author:   dominikoeo
Date:     2012-05-25 18:50:09 +0000 (Fri, 25 May 2012)
Log Message:
-----------
[fr] fixed false positive: the Java French rule incorrectly
     indicated a missing space before the colon in URLs
     such as http://www.  

Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/fr/QuestionWhitespaceRule.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/fr/QuestionWhitespaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/fr/QuestionWhitespaceRule.java
  2012-05-25 18:01:37 UTC (rev 7052)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/fr/QuestionWhitespaceRule.java
  2012-05-25 18:50:09 UTC (rev 7053)
@@ -21,6 +21,8 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.ResourceBundle;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.languagetool.AnalyzedSentence;
 import org.languagetool.AnalyzedTokenReadings;
@@ -38,6 +40,10 @@
  */
 public class QuestionWhitespaceRule extends FrenchRule {
 
+  // Pattern used to avoid false positive when signaling missing
+  // space before and after colon ':' in URL with common schemes.
+  private static final Pattern patternUrl = 
Pattern.compile("^(file|s?ftp|finger|git|gopher|hdl|https?|shttp|imap|mailto|mms|nntp|s?news(post|reply)?|prospero|rsync|rtspu|sips?|svn|svn\\+ssh|telnet|wais)$");
+
   public QuestionWhitespaceRule(final ResourceBundle messages) {
     // super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
@@ -93,7 +99,7 @@
           fixLen = 1;
         }        
       } else {
-        // Stricly speaking, the character before ?!;: should be an
+        // Strictly speaking, the character before ?!;: should be an
         // "espace fine insécable" (U+202f).  In practise, an
         // "espace insécable" (U+00a0) is also often used. Let's accept both.
         if (token.equals("?") && !prevToken.equals("!")
@@ -114,12 +120,16 @@
           // non-breaking space
           suggestionText = prevToken + " ;";
           fixLen = 1;
-        } else if (token.equals(":") 
+        } else if (token.equals(":")
             && !prevToken.equals("\u00a0") && !prevToken.equals("\u202f")) {
-          msg = "Deux-points précédés d'une espace fine insécable.";
-          // non-breaking space
-          suggestionText = prevToken + " :";
-          fixLen = 1;
+          // Avoid false positive for URL like http://www.languagetool.org.
+          final Matcher matcherUrl = patternUrl.matcher(prevToken);
+          if (!matcherUrl.find()) {
+            msg = "Deux-points précédés d'une espace fine insécable.";
+            // non-breaking space
+            suggestionText = prevToken + " :";
+            fixLen = 1;
+          }
         } else if (token.equals("»")
             && !prevToken.equals("\u00a0") && !prevToken.equals("\u202f")) {
           msg = "Le guillemet fermant est précédé d'une espace fine 
insécable.";

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to