Revision: 9204
http://languagetool.svn.sourceforge.net/languagetool/?rev=9204&view=rev
Author: jaumeortola
Date: 2013-01-24 20:19:20 +0000 (Thu, 24 Jan 2013)
Log Message:
-----------
[ca] Improved Catalan sentence tokenization.
Modified Paths:
--------------
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
Modified:
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
===================================================================
---
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
2013-01-24 14:36:18 UTC (rev 9203)
+++
trunk/languagetool/languagetool-core/src/main/resources/org/languagetool/resource/segment.srx
2013-01-24 20:19:20 UTC (rev 9204)
@@ -4258,6 +4258,10 @@
<afterbreak></afterbreak>
</rule>
<!-- Abbreviations that can finish sentences -->
+<rule break="no"> <!-- segle XIX, s. XIX -->
+<beforebreak>\bs\.\s</beforebreak>
+<afterbreak>[XIV]+\b</afterbreak>
+</rule>
<rule break="no">
<beforebreak>\b(u|s|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*\s</beforebreak>
<afterbreak>\p{Ll}</afterbreak>
Modified:
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
2013-01-24 14:36:18 UTC (rev 9203)
+++
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/tokenizers/ca/CatalanSentenceTokenizerTest.java
2013-01-24 20:19:20 UTC (rev 9204)
@@ -61,6 +61,7 @@
testSplit(new String[] { "El Dr. Joan no vindrà." });
testSplit(new String[] { "Distingit Sr. Joan," });
testSplit(new String[] { "Molt Hble. Sr. President" });
+ testSplit(new String[] { "de Sant Nicolau (del s. XII; cor gòtic del s.
XIV) i de Sant " });
// Exception to abbreviations
testSplit(new String[] { "Ell és el número u. ", "Jo el dos." });
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnnow-d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits