Revision: 6741
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6741&view=rev
Author:   dnaber
Date:     2012-04-17 21:45:39 +0000 (Tue, 17 Apr 2012)
Log Message:
-----------
Improved startup speed by using Sax parser and new segment library (Jarek 
Lipski)

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/build.properties
    
trunk/JLanguageTool/src/java/org/languagetool/tokenizers/SRXSentenceTokenizer.java

Added Paths:
-----------
    trunk/JLanguageTool/libs/segment-1.3.8.jar

Removed Paths:
-------------
    trunk/JLanguageTool/libs/segment-1.3.0.jar

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-04-17 00:19:26 UTC (rev 6740)
+++ trunk/JLanguageTool/CHANGES.txt     2012-04-17 21:45:39 UTC (rev 6741)
@@ -10,7 +10,9 @@
 
  -GUI: made the result of "Tag Text" more readable
 
+ -Improved startup speed (Jarek Lipski)
 
+
 1.7 (2012-03-25)
 
  -English

Modified: trunk/JLanguageTool/build.properties
===================================================================
--- trunk/JLanguageTool/build.properties        2012-04-17 00:19:26 UTC (rev 
6740)
+++ trunk/JLanguageTool/build.properties        2012-04-17 21:45:39 UTC (rev 
6741)
@@ -29,7 +29,7 @@
 ext.morfologik.fsa.lib = ${ext.dir}/${morfologik.fsa.lib}
 ext.morfologik.stemming.lib = ${ext.dir}/${morfologik.stemming.lib}
 ext.jwordsplitter.lib = ${ext.dir}/jWordSplitter.jar
-segment.lib = segment-1.3.0.jar
+segment.lib = segment-1.3.8.jar
 ext.segment.lib = ${ext.dir}/${segment.lib}
 logging.lib = commons-logging-1.1.1.jar
 ext.logging.lib = ${ext.dir}/${logging.lib}

Deleted: trunk/JLanguageTool/libs/segment-1.3.0.jar
===================================================================
(Binary files differ)

Added: trunk/JLanguageTool/libs/segment-1.3.8.jar
===================================================================
(Binary files differ)


Property changes on: trunk/JLanguageTool/libs/segment-1.3.8.jar
___________________________________________________________________
Added: svn:mime-type
   + application/octet-stream

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/tokenizers/SRXSentenceTokenizer.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/tokenizers/SRXSentenceTokenizer.java
  2012-04-17 00:19:26 UTC (rev 6740)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/tokenizers/SRXSentenceTokenizer.java
  2012-04-17 21:45:39 UTC (rev 6741)
@@ -19,43 +19,65 @@
 package org.languagetool.tokenizers;
 
 import java.io.BufferedReader;
+import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import net.sourceforge.segment.TextIterator;
 import net.sourceforge.segment.srx.SrxDocument;
 import net.sourceforge.segment.srx.SrxParser;
 import net.sourceforge.segment.srx.SrxTextIterator;
-import net.sourceforge.segment.srx.io.Srx2Parser;
+import net.sourceforge.segment.srx.io.Srx2SaxParser;
+
 import org.languagetool.JLanguageTool;
 
 /**
  * Class to tokenize sentences using an SRX file.
  * 
  * @author Marcin MiƂkowski
- * 
+ * @author Jarek Lipski
  */
 public class SRXSentenceTokenizer extends SentenceTokenizer {
-
-  private final BufferedReader srxReader;
-  private final SrxDocument document;
+       
   private final String language;
   private String parCode;
 
-  static final String RULES = "/segment.srx";
+  private static final String RULES = "/segment.srx";
+  
+  private static final SrxDocument document = createSrxDocument();
 
-  public SRXSentenceTokenizer(final String language) {
-    this.language = language;
+  private static SrxDocument createSrxDocument() {
+    BufferedReader srxReader = null; 
     try {
       srxReader = new BufferedReader(new InputStreamReader(
-                 
JLanguageTool.getDataBroker().getFromResourceDirAsStream(RULES), "utf-8"));
-    } catch (Exception e) {
+          JLanguageTool.getDataBroker().getFromResourceDirAsStream(RULES), 
"utf-8"));
+      
+      Map<String, Object> parserParameters = new HashMap<String, Object>();
+      parserParameters.put(Srx2SaxParser.VALIDATE_PARAMETER, true);
+      SrxParser srxParser = new Srx2SaxParser(parserParameters);
+      
+      SrxDocument document = srxParser.parse(srxReader);
+      return document;
+    } catch (IOException e) {
       throw new RuntimeException("Could not load rules " + RULES + " from 
resource dir "
-         + JLanguageTool.getDataBroker().getResourceDir(), e);
+          + JLanguageTool.getDataBroker().getResourceDir(), e);
+    } finally {
+      if (srxReader != null) {
+        try {
+          srxReader.close();
+        } catch (IOException e) {
+          // can't throw exception in final block, so logging an error.
+          System.err.println("Error closing SRX file reader.");
+        }
+      }
     }
-    final SrxParser srxParser = new Srx2Parser();
-    document = srxParser.parse(srxReader);
+  }
+  
+  public SRXSentenceTokenizer(final String language) {
+    this.language = language;
     setSingleLineBreaksMarksParagraph(false);
   }
 
@@ -91,12 +113,4 @@
     }
   }
 
-  @Override
-  protected final void finalize() throws Throwable {
-    if (srxReader != null) {
-      srxReader.close();
-    }
-    super.finalize();
-  }
-
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Better than sec? Nothing is better than sec when it comes to
monitoring Big Data applications. Try Boundary one-second 
resolution app monitoring today. Free.
http://p.sf.net/sfu/Boundary-dev2dev
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to