Revision: 7367
http://languagetool.svn.sourceforge.net/languagetool/?rev=7367&view=rev
Author: dnaber
Date: 2012-06-16 16:22:02 +0000 (Sat, 16 Jun 2012)
Log Message:
-----------
OOo/LO: fixed false alarm about word being uppercase when the previous sentence
ended with a footnote - Sourceforge bug #3534637
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/resource/segment.srx
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/SRXSentenceTokenizerTest.java
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2012-06-16 14:30:27 UTC (rev 7366)
+++ trunk/JLanguageTool/CHANGES.txt 2012-06-16 16:22:02 UTC (rev 7367)
@@ -65,6 +65,9 @@
-LibreOffice / OpenOffice integration: SingletonFactory now implements
XServiceInfo (Stephan Bergmann) - Sourceforge bug #3526635
+
+ -LibreOffice / OpenOffice integration: Fixed false alarm about word being
+ uppercase when the previous sentence ended with a footnote - Sourceforge bug
#3534637
-LanguageTool supports now separate rules for different local variants of a
language,
for example American English and British English. To use them from the
command line,
Modified: trunk/JLanguageTool/src/resource/segment.srx
===================================================================
--- trunk/JLanguageTool/src/resource/segment.srx 2012-06-16 14:30:27 UTC
(rev 7366)
+++ trunk/JLanguageTool/src/resource/segment.srx 2012-06-16 16:22:02 UTC
(rev 7367)
@@ -4504,7 +4504,7 @@
</rule>
<!-- Break rules -->
<rule break="yes">
-<beforebreak>[\.!?…]['|"|«|\)|\]|\}]?\s+</beforebreak>
+<beforebreak>[\.!?…][\u0002|'|"|«|\)|\]|\}]?\s+</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
@@ -4657,7 +4657,7 @@
</rule>
<!-- Break rules -->
<rule break="yes">
-<beforebreak>[\.!?…]['|"|«|\)|\]|\}]?\s+</beforebreak>
+<beforebreak>[\.!?…][\u0002|'|"|«|\)|\]|\}]?\s+</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
@@ -4685,7 +4685,7 @@
</rule>
<!-- Break rules -->
<rule break="yes">
-<beforebreak>[\.!?…]['|"|«|\)|\]|\}]?\s+</beforebreak>
+<beforebreak>[\.!?…][\u0002|'|"|«|\)|\]|\}]?\s+</beforebreak>
<afterbreak></afterbreak>
</rule>
<rule break="yes">
Modified:
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/SRXSentenceTokenizerTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/SRXSentenceTokenizerTest.java
2012-06-16 14:30:27 UTC (rev 7366)
+++
trunk/JLanguageTool/src/test/org/languagetool/tokenizers/SRXSentenceTokenizerTest.java
2012-06-16 16:22:02 UTC (rev 7367)
@@ -19,6 +19,7 @@
package org.languagetool.tokenizers;
import junit.framework.TestCase;
+import org.languagetool.Language;
import org.languagetool.TestTools;
/**
@@ -30,7 +31,7 @@
private SentenceTokenizer stokenizer = new SRXSentenceTokenizer("en");
// accept only \n\n as paragraph:
private SentenceTokenizer stokenizer2 = new SRXSentenceTokenizer("en");
-
+
public void setUp() {
stokenizer.setSingleLineBreaksMarksParagraph(true);
stokenizer2.setSingleLineBreaksMarksParagraph(false);
@@ -99,8 +100,26 @@
testSplit(new String[] { "It works [really!]. ", "No doubt." });
testSplit(new String[] { "It really(!) works well." });
testSplit(new String[] { "It really[!] works well." });
+
+ testSplit(new String[] { "This is a sentence.\u0002 ", "And this is
another one." }); // footnotes in LibOO/OOo look like this
}
+ public void testOfficeFootnoteTokenize() {
+ for (Language language : Language.REAL_LANGUAGES) {
+ if (language.getSentenceTokenizer().getClass() !=
SRXSentenceTokenizer.class) {
+ continue;
+ }
+ if (language == Language.KHMER || language == Language.MALAYALAM ||
language.getShortName().equals("pt")) {
+ // TODO: I don't know about these...
+ continue;
+ }
+ final String input = "A sentence.\u0002 And another one.";
+ final SentenceTokenizer tokenizer = new
SRXSentenceTokenizer(language.getShortName());
+ assertEquals("Sentence not split correctly for " + language + ": '" +
input + "'",
+ "[A sentence.\u0002 , And another one.]",
tokenizer.tokenize(input).toString());
+ }
+ }
+
private void testSplit(String[] sentences) {
TestTools.testSplit(sentences, stokenizer);
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs