Revision: 7247 http://languagetool.svn.sourceforge.net/languagetool/?rev=7247&view=rev Author: dnaber Date: 2012-06-05 19:14:52 +0000 (Tue, 05 Jun 2012) Log Message: ----------- lazy init for hunspell rule
Modified Paths: -------------- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2012-06-05 18:44:46 UTC (rev 7246) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2012-06-05 19:14:52 UTC (rev 7247) @@ -33,27 +33,27 @@ * you should simply create a subclass of this class. * * @author Marcin MiĆkowski - * */ public abstract class SpellingCheckRule extends Rule { + protected final Language language; + @Override - public abstract String getId(); - - + public abstract String getId(); + public SpellingCheckRule(final ResourceBundle messages, final Language language) { - super(messages); + super(messages); + this.language = language; } - @Override public abstract String getDescription(); + @Override public abstract RuleMatch[] match(AnalyzedSentence text) throws IOException; @Override public void reset() { - } } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2012-06-05 18:44:46 UTC (rev 7246) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2012-06-05 19:14:52 UTC (rev 7247) @@ -48,86 +48,19 @@ */ public class HunspellRule extends SpellingCheckRule { - private final Pattern nonWord; - private final static String NON_ALPHABETIC = "[^\\p{L}]"; + private final static String NON_ALPHABETIC = "[^\\p{L}]"; - /** - * The dictionary file - */ + /** The dictionary file */ private Hunspell.Dictionary dictionary = null; + private Pattern nonWordPattern; + private boolean needsInit = true; public HunspellRule(final ResourceBundle messages, final Language language) throws UnsatisfiedLinkError, UnsupportedOperationException, IOException { super(messages, language); super.setCategory(new Category(messages.getString("category_typo"))); + } - final String langCountry = language.getShortName() - + "_" - + language.getCountryVariants()[0]; - - final String shortDicPath = "/" - + language.getShortName() - + "/hunspell/" - + langCountry - + ".dic"; - - String wordChars = ""; - //set dictionary only if there are dictionary files - if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) { - dictionary = Hunspell.getInstance(). - getDictionary(getDictionaryPath(langCountry, shortDicPath)); - - if (!"".equals(dictionary.getWordChars())) { - wordChars = "(?![" + dictionary.getWordChars().replace("-", "\\-") + "])"; - } - } - - nonWord = Pattern.compile(wordChars + NON_ALPHABETIC); - } - - private String getDictionaryPath(final String dicName, - final String originalPath) throws IOException { - - final URL dictURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl( - originalPath); - - String dictionaryPath = dictURL.getPath(); - - //in the webstart version, we need to copy the files outside the jar - //to the local temporary directory - if ("jar".equals(dictURL.getProtocol())) { - final File tempDir = new File(System.getProperty("java.io.tmpdir")); - File temporaryFile = new File(tempDir, dicName + ".dic"); - JLanguageTool.addTemporaryFile(temporaryFile); - fileCopy(JLanguageTool.getDataBroker(). - getFromResourceDirAsStream(originalPath), temporaryFile); - temporaryFile = new File(tempDir, dicName + ".aff"); - JLanguageTool.addTemporaryFile(temporaryFile); - fileCopy(JLanguageTool.getDataBroker(). - getFromResourceDirAsStream(originalPath. - replaceFirst(".dic$", ".aff")), temporaryFile); - - dictionaryPath = tempDir.getAbsolutePath() + "/" + dicName; - } else { - dictionaryPath = dictionaryPath.substring(0, dictionaryPath.length() - 4); - } - return dictionaryPath; - } - - private void fileCopy(final InputStream in, final File targetFile) throws IOException { - final OutputStream out = new FileOutputStream(targetFile); - try { - final byte[] buf = new byte[1024]; - int len; - while ((len = in.read(buf)) > 0) { - out.write(buf, 0, len); - } - in.close(); - } finally { - out.close(); - } - } - @Override public String getId() { return "HUNSPELL_RULE"; @@ -140,16 +73,18 @@ @Override public RuleMatch[] match(AnalyzedSentence text) throws IOException { - final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); - final String[] tokens = tokenizeText(getSentenceText(text)); - - // some languages might not have a dictionary, be silent about it + final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); + if (needsInit) { + init(); + } if (dictionary == null) { + // some languages might not have a dictionary, be silent about it return toRuleMatchArray(ruleMatches); } - int len = text.getTokens()[1].getStartPos(); - + final String[] tokens = tokenizeText(getSentenceText(text)); + // starting with the first token to skip the zero-length START_SENT + int len = text.getTokens()[1].getStartPos(); for (final String word : tokens) { boolean isAlphabetic = true; if (word.length() == 1) { // hunspell dictionaries usually do not contain punctuation @@ -171,17 +106,82 @@ return toRuleMatchArray(ruleMatches); } - - private String getSentenceText(final AnalyzedSentence sentence) { - final StringBuilder sb = new StringBuilder(); - for (int i = 1; i < sentence.getTokens().length; i++) { - sb.append(sentence.getTokens()[i].getToken()); - } - return sb.toString(); - } - - private String[] tokenizeText(final String sentence) { - return nonWord.split(sentence); - } + private String[] tokenizeText(final String sentence) throws IOException { + return nonWordPattern.split(sentence); + } + + private String getSentenceText(final AnalyzedSentence sentence) { + final StringBuilder sb = new StringBuilder(); + for (int i = 1; i < sentence.getTokens().length; i++) { + sb.append(sentence.getTokens()[i].getToken()); + } + return sb.toString(); + } + + private void init() throws IOException { + final String langCountry = language.getShortName() + + "_" + + language.getCountryVariants()[0]; + final String shortDicPath = "/" + + language.getShortName() + + "/hunspell/" + + langCountry + + ".dic"; + String wordChars = ""; + // set dictionary only if there are dictionary files: + if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) { + dictionary = Hunspell.getInstance(). + getDictionary(getDictionaryPath(langCountry, shortDicPath)); + + if (!"".equals(dictionary.getWordChars())) { + wordChars = "(?![" + dictionary.getWordChars().replace("-", "\\-") + "])"; + } + } + nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC); + needsInit = false; + } + + private String getDictionaryPath(final String dicName, + final String originalPath) throws IOException { + + final URL dictURL = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(originalPath); + String dictionaryPath = dictURL.getPath(); + + //in the webstart version, we need to copy the files outside the jar + //to the local temporary directory + if ("jar".equals(dictURL.getProtocol())) { + final File tempDir = new File(System.getProperty("java.io.tmpdir")); + File temporaryFile = new File(tempDir, dicName + ".dic"); + JLanguageTool.addTemporaryFile(temporaryFile); + fileCopy(JLanguageTool.getDataBroker(). + getFromResourceDirAsStream(originalPath), temporaryFile); + temporaryFile = new File(tempDir, dicName + ".aff"); + JLanguageTool.addTemporaryFile(temporaryFile); + fileCopy(JLanguageTool.getDataBroker(). + getFromResourceDirAsStream(originalPath. + replaceFirst(".dic$", ".aff")), temporaryFile); + + dictionaryPath = tempDir.getAbsolutePath() + "/" + dicName; + } else { + final int suffixLength = ".dic".length(); + dictionaryPath = dictionaryPath.substring(0, dictionaryPath.length() - suffixLength); + } + return dictionaryPath; + } + + private void fileCopy(final InputStream in, final File targetFile) throws IOException { + final OutputStream out = new FileOutputStream(targetFile); + try { + final byte[] buf = new byte[1024]; + int len; + while ((len = in.read(buf)) > 0) { + out.write(buf, 0, len); + } + in.close(); + } finally { + out.close(); + } + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs