Revision: 7343
http://languagetool.svn.sourceforge.net/languagetool/?rev=7343&view=rev
Author: dnaber
Date: 2012-06-14 20:37:31 +0000 (Thu, 14 Jun 2012)
Log Message:
-----------
adding workaround to make rule creator work with Chinese
Modified Paths:
--------------
trunk/ltcommunity/grails-app/services/org/languagetool/PatternStringConverterService.groovy
Modified:
trunk/ltcommunity/grails-app/services/org/languagetool/PatternStringConverterService.groovy
===================================================================
---
trunk/ltcommunity/grails-app/services/org/languagetool/PatternStringConverterService.groovy
2012-06-14 19:12:10 UTC (rev 7342)
+++
trunk/ltcommunity/grails-app/services/org/languagetool/PatternStringConverterService.groovy
2012-06-14 20:37:31 UTC (rev 7343)
@@ -28,7 +28,7 @@
static transactional = true
def convertToPatternRule(String patternString, Language lang) {
- List patternParts = getPatternParts(lang, patternString)
+ List patternParts = getPatternParts(patternString, lang)
List elements = []
for (patternPart in patternParts) {
boolean isRegex = isRegex(patternPart)
@@ -44,27 +44,31 @@
return patternPart.find("[.|+*?\\[\\]]") != null
}
- private List getPatternParts(Language lang, String patternString) {
+ private List getPatternParts(String patternString, Language lang) {
// First split at whitespace, then properly tokenize unless it's a
regex. Only this way we will
// properly tokenize "don't" but don't tokenize a regex like "foob.r":
List simpleParts = patternString.split("\\s+")
def tokenizer = lang.getWordTokenizer()
List patternParts = []
- for (simplePart in simpleParts) {
+ for (String simplePart in simpleParts) {
if (isRegex(simplePart)) {
patternParts.add(simplePart)
} else {
- patternParts.addAll(getTokens(tokenizer, simplePart))
+ patternParts.addAll(getTokens(tokenizer, simplePart, lang))
}
}
return patternParts
}
- private List getTokens(Tokenizer tokenizer, simplePart) {
+ private List getTokens(Tokenizer tokenizer, String simplePart, Language
lang) {
List tokens = []
List patternPartsWithWhitespace = tokenizer.tokenize(simplePart)
for (patternPart in patternPartsWithWhitespace) {
if (!patternPart.trim().isEmpty()) {
+ if
(lang.getShortName().equals(Language.CHINESE.getShortName())) {
+ // for some reason, tokens end with "/v" etc. in Chinese,
cut that off:
+ patternPart = patternPart.replaceFirst("/.*", "")
+ }
tokens.add(patternPart)
}
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs