Revision: 8503
http://languagetool.svn.sourceforge.net/languagetool/?rev=8503&view=rev
Author: dnaber
Date: 2012-12-04 18:05:45 +0000 (Tue, 04 Dec 2012)
Log Message:
-----------
[en] use the same word tokenizer again as other European languages - avoids
code duplication
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/en/EnglishWordTokenizer.java
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2012-12-04 17:45:07 UTC (rev 8502)
+++ trunk/JLanguageTool/CHANGES.txt 2012-12-04 18:05:45 UTC (rev 8503)
@@ -31,6 +31,9 @@
-several new rules
-updated POS dictionary (fix wrong POS Tags)
+ -English:
+ -uses the same word tokenizer again as other European languages
+
-OpenOffice/LibreOffice: Fixed ConcurrentModificationException (Sourceforge
bug #3572536)
-API: Language.getLanguageForShortName() now consistently throws an exception
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
2012-12-04 17:45:07 UTC (rev 8502)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/language/English.java
2012-12-04 18:05:45 UTC (rev 8503)
@@ -43,7 +43,6 @@
import org.languagetool.tokenizers.SRXSentenceTokenizer;
import org.languagetool.tokenizers.SentenceTokenizer;
import org.languagetool.tokenizers.Tokenizer;
-import org.languagetool.tokenizers.en.EnglishWordTokenizer;
public class English extends Language {
@@ -95,14 +94,6 @@
}
@Override
- public final Tokenizer getWordTokenizer() {
- if (wordTokenizer == null) {
- wordTokenizer = new EnglishWordTokenizer();
- }
- return wordTokenizer;
- }
-
- @Override
public final Synthesizer getSynthesizer() {
if (synthesizer == null) {
synthesizer = new EnglishSynthesizer();
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/en/EnglishWordTokenizer.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/en/EnglishWordTokenizer.java
2012-12-04 17:45:07 UTC (rev 8502)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/tokenizers/en/EnglishWordTokenizer.java
2012-12-04 18:05:45 UTC (rev 8503)
@@ -28,6 +28,7 @@
* Tokenizes a sentence into words. Punctuation and whitespace gets its own
token.
*
* @author Daniel Naber
+ * @deprecated use {@link org.languagetool.tokenizers.WordTokenizer} instead
(deprecated since 2.0)
*/
public class EnglishWordTokenizer implements Tokenizer {
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
2012-12-04 17:45:07 UTC (rev 8502)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
2012-12-04 18:05:45 UTC (rev 8503)
@@ -8321,9 +8321,11 @@
</rule>
<rule id="WRONG_APOSTROPHE" name="shouldn´t">
<pattern>
- <token
regexp="yes">(ain|aren|can|couldn|didn|doesn|don|hadn|hasn|haven|isn|mayn|mightn|mustn|needn|oughtn|shan|shouldn|wasn|weren|won|wouldn)(´|`)t</token>
+ <token
regexp="yes">(ain|aren|can|couldn|didn|doesn|don|hadn|hasn|haven|isn|mayn|mightn|mustn|needn|oughtn|shan|shouldn|wasn|weren|won|wouldn)</token>
+ <token regexp="yes">(´|`)</token>
+ <token>t</token>
</pattern>
- <message>You used an accent character instead of an apostrophe.
Please use either <suggestion><match no="1" regexp_match="´|`"
regexp_replace="'"></match></suggestion> or <suggestion><match no="1"
regexp_match="´|`" regexp_replace="’"></match></suggestion>.</message>
+ <message>You used an accent character instead of an apostrophe.
Please use either <suggestion>\1'\3</suggestion> or
<suggestion>\1’\3</suggestion>.</message>
<short>Accent character misused as apostrophe</short>
<example correction="shouldn't|shouldn’t" type="incorrect">You
<marker>shouldn´t</marker> do it!</example>
<example type="correct">You shouldn't do it!</example>
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits