Revision: 8547
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8547&view=rev
Author:   jaumeortola
Date:     2012-12-15 00:16:49 +0000 (Sat, 15 Dec 2012)
Log Message:
-----------
[ca] Change in CatalanWordTokenizer:
Hyphen is used (again) as a word separator, because this is very common in web 
pages. 
This way some errors cannot be found if the rule substituting hyphen for dash 
is disabled.

Modified Paths:
--------------
    
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
  2012-12-14 23:02:14 UTC (rev 8546)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/ca/CatalanWordTokenizer.java
  2012-12-15 00:16:49 UTC (rev 8547)
@@ -80,13 +80,15 @@
        /**
         * @param text Text to tokenize
         * @return List of tokens.
-        *         Note: a special string ##CA_APOS## is used to replace 
apostrophes
-        *         during tokenizing (as in Dutch).
+        *         Note: a special string ##CA_APOS## is used to replace 
apostrophes,
+        *         and ##CA_HYPHEN## to replace hyphens.
         */
        @Override
        public List<String> tokenize(final String text) {
                final List<String> l = new ArrayList<String>();
-               final StringTokenizer st = new 
StringTokenizer(text.replaceAll("([\\p{L}])['’]([\\p{L}])", "$1##CA_APOS##$2"),
+               final StringTokenizer st = new 
StringTokenizer(text.replaceAll("([\\p{L}])['’]([\\p{L}])", "$1##CA_APOS##$2")
+                               .replaceAll("([\\p{L}])-([\\p{L}])-([\\p{L}])", 
"$1##CA_HYPHEN##$2##CA_HYPHEN##$3")  //it's necessary for words like 
"vint-i-quatre" 
+                               .replaceAll("([\\p{L}])-([\\p{L}])", 
"$1##CA_HYPHEN##$2"),
                                "\u0020\u00A0\u115f\u1160\u1680" 
                                                + 
"\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007"
                                                + 
"\u2008\u2009\u200A\u200B\u200c\u200d\u200e\u200f"
@@ -94,12 +96,12 @@
                                                + 
"\u2028\u2029\u202a\u202b\u202c\u202d\u202e\u202f"
                                                + 
"\u205F\u2060\u2061\u2062\u2063\u206A\u206b\u206c\u206d"
                                                + 
"\u206E\u206F\u3000\u3164\ufeff\uffa0\ufff9\ufffa\ufffb"
-                                               + 
",.;()[]{}<>!?:/\\\"'«»„”“‘’`´…¿¡\t\n\r", true);
+                                               + 
",.;()[]{}<>!?:/\\\"'«»„”“‘’`´…¿¡-\t\n\r", true);
                String s;
                String groupStr;
                
                while (st.hasMoreElements()) {
-                       s=st.nextToken().replace("##CA_APOS##", "'");
+                       s=st.nextToken().replaceAll("##CA_APOS##", 
"'").replaceAll("##CA_HYPHEN##", "-");
                        Matcher matcher=null;
                        boolean matchFound=false;
                        int j=0;

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-14 23:02:14 UTC (rev 8546)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/ca/grammar.xml    
    2012-12-15 00:16:49 UTC (rev 8547)
@@ -7381,7 +7381,7 @@
                 </pattern>
                 <message>Combinació incorrecta de pronoms febles.</message>
                 <short>Combinació incorrecta de pronoms.</short>
-                <example type="incorrect">Portem 
<marker>-los-els-los</marker>.</example>
+                <example 
type="incorrect">Portem<marker>-los-els-los</marker>.</example>
                 <example type="correct">Portem-los-els.</example>
             </rule>
             <rule>
@@ -7731,7 +7731,7 @@
                 </pattern>
                 <message>Forma de pronom feble incorrecta.</message>
                 <short>Forma de pronom feble incorrecta.</short>
-                <example type="incorrect">Portar 
<marker>-els</marker>.</example>
+                <example 
type="incorrect">Portar<marker>-els</marker>.</example>
                 <example type="correct">Portar-los.</example>
             </rule>
             <rule>
@@ -7773,7 +7773,7 @@
                 </pattern>
                 <message>Cal apostrofar: <suggestion><match no="2" 
regexp_match="-[e]?([^e])e?" regexp_replace="-$1e"></match><match no="3" 
regexp_match="-e?([^e]{1,2})e?" 
regexp_replace="'$1"></match></suggestion>.</message>
                 <short>Cal apostrofar.</short>
-                <example type="incorrect">Emporta 
<marker>-te</marker>-el.</example>
+                <example 
type="incorrect">Emporta<marker>-te</marker>-el.</example>
                 <example type="correct">Emporta-te'l.</example>
             </rule>
             <rule>
@@ -8523,51 +8523,55 @@
             <example type="incorrect">Ara mateix<marker>...</marker></example>
             <example type="correct">Ara mateix…</example>
         </rule>
-        <rulegroup id="GUIONET_SOLT" name="Substitueix un guionet solt per 
guió llarg" default="off">
+        <rulegroup id="GUIONET_GUIO" name="Substitueix un guionet solt per 
guió llarg" default="off">
             <rule>
                 <pattern>
-                    <token spacebefore="yes" regexp="yes">-[^-]+<exception 
inflected="yes">numeral</exception></token>
+                    <marker>
+                        <token spacebefore="yes">-</token>
+                    </marker>
+                    <token spacebefore="no" regexp="yes">[^-].*</token>
                 </pattern>
-                <message>Cal substituir el guionet. <suggestion><match no="1" 
regexp_match=".(.+)" regexp_replace="—$1"></match></suggestion>.</message>
+                <message>Cal substituir el guionet per guió: 
<suggestion>—</suggestion>.</message>
                 <short>Cal substituir el guionet</short>
-                <example type="incorrect">Vine 
<marker>-digué</marker>.</example>
-                <example type="correct">Vine <marker>—digué</marker>.</example>
+                <example type="incorrect">Vine 
<marker>-</marker>digué.</example>
+                <example type="correct">Vine —digué.</example>
             </rule>
             <rule>
                 <pattern>
+                    <token regexp="yes">.*[^-]</token>
                     <marker>
-                        <token regexp="yes">[^-]+-</token>
+                        <token spacebefore="no">-</token>
                     </marker>
                 </pattern>
-                <message>Cal substituir el guionet. <suggestion><match no="1" 
regexp_match="(.+)." regexp_replace="$1—"></match></suggestion>.</message>
+                <message>Cal substituir el guionet per guió: 
<suggestion>—</suggestion>.</message>
                 <short>Cal substituir el guionet</short>
-                <example type="incorrect"><marker>digué-</marker> 
vine.</example>
-                <example type="correct"><marker> digué—</marker> vine</example>
+                <example type="incorrect">digué<marker>-</marker> 
vine.</example>
+                <example type="correct">digué— vine</example>
             </rule>
             <rule>
                 <pattern>
+                    <token regexp="yes">.*[^-]</token>
                     <marker>
-                        <token regexp="yes">[^-]+-</token>
+                        <token spacebefore="no">-</token>
                     </marker>
                     <token regexp="yes">;|,|\.</token>
                 </pattern>
-                <message>Cal substituir el guionet. <suggestion><match no="1" 
regexp_match="(.+)." regexp_replace="$1—"></match></suggestion>.</message>
+                <message>Cal substituir el guionet per guió: 
<suggestion>—</suggestion>.</message>
                 <short>Cal substituir el guionet</short>
-                <example type="incorrect"><marker>digué-</marker>, 
vine.</example>
-                <example type="correct"><marker> digué—</marker>, 
vine</example>
+                <example type="incorrect">digué<marker>-</marker>, 
vine.</example>
+                <example type="correct">digué—, vine</example>
             </rule>
             <rule>
                 <pattern>
                     <marker>
-                        <token regexp="yes">[-]+</token>
+                        <token>-</token>
+                        <token>-</token>
                     </marker>
                 </pattern>
-                <message>Cal substituir els guionets. 
<suggestion>—</suggestion>.</message>
+                <message>Cal substituir els guionets per guió: 
<suggestion>—</suggestion>.</message>
                 <short>Cal substituir els guionets</short>
-                <example type="incorrect"><marker>--</marker>
-                </example>
-                <example type="correct"><marker>—</marker>
-                </example>
+                <example type="incorrect"><marker>--</marker></example>
+                <example type="correct"><marker>—</marker></example>
             </rule>
         </rulegroup>
         <!--  <rulegroup id="apostrof_cometes" name="Apòstrof i cometes">

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to