Author: pkluegl
Date: Fri Feb  8 11:39:00 2013
New Revision: 1443947

URL: http://svn.apache.org/r1443947
Log:
UIMA-2647
- applied patch for loading word lists in jars
- adapted workbench commands for creating tries

Modified:
    
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java
    
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java
    
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java
    
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java
    
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java
    
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java

Modified: 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java
 Fri Feb  8 11:39:00 2013
@@ -20,6 +20,8 @@
 package org.apache.uima.textmarker;
 
 import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -201,18 +203,42 @@ public class TextMarkerEnvironment {
     TextMarkerWordList result = wordLists.get(list);
     if (result == null) {
       boolean found = false;
-      for (String eachPath : resourcePaths) {
-        File file = new File(eachPath, list);
-        if (!file.exists()) {
-          continue;
+      if (resourcePaths != null) {
+        for (String eachPath : resourcePaths) {
+          File file = new File(eachPath, list);
+          if (!file.exists()) {
+            continue;
+          }
+          found = true;
+          try {
+            if (file.getName().endsWith("mtwl")) {
+              wordLists.put(list, new 
MultiTreeWordList(file.getAbsolutePath()));
+            } else {
+              wordLists.put(list, new TreeWordList(file.getAbsolutePath()));
+            }
+          } catch (IOException e) {
+            Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
+                    "Error reading word list", e);
+            found = false;
+          }
+          break;
         }
-        found = true;
-        if (file.getName().endsWith("mtwl")) {
-          wordLists.put(list, new MultiTreeWordList(file.getAbsolutePath()));
-        } else {
-          wordLists.put(list, new TreeWordList(file.getAbsolutePath()));
+      }
+      if (!found) {
+        InputStream stream = ClassLoader.getSystemResourceAsStream(list);
+        if (stream != null) {
+          found = true;
+          try {
+            if (list.endsWith(".mtwl"))
+              wordLists.put(list, new MultiTreeWordList(stream, list));
+            else
+              wordLists.put(list, new TreeWordList(stream, list));
+          } catch (IOException e) {
+            Logger.getLogger(this.getClass().getName()).log(Level.SEVERE,
+                    "Error reading word list from classpath", e);
+            found = false;
+          }
         }
-        break;
       }
       if (!found) {
         Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Can't 
find " + list + "!");
@@ -253,7 +279,7 @@ public class TextMarkerEnvironment {
   private Object getInitialValue(String name, Class<?> type) {
     Object init = initializedVariables.get(name);
     if (init != null) {
-      if(init instanceof List) {
+      if (init instanceof List) {
         ArrayList<Object> list = new ArrayList<Object>();
         list.addAll((Collection<? extends Object>) init);
         return list;
@@ -407,7 +433,7 @@ public class TextMarkerEnvironment {
   @SuppressWarnings("unchecked")
   public void setInitialVariableValue(String var, Object value) {
     if (ownsVariable(var)) {
-      if(value instanceof List) {
+      if (value instanceof List) {
         List<Object> initValue = new ArrayList<Object>();
         initValue.addAll((Collection<? extends Object>) value);
         initializedVariables.put(var, initValue);

Modified: 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java
 Fri Feb  8 11:39:00 2013
@@ -22,8 +22,8 @@ package org.apache.uima.textmarker.resou
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -62,7 +62,7 @@ public class MultiTreeWordList implement
   /**
    * Default constructor.
    */
-  public MultiTreeWordList() {
+  public MultiTreeWordList() throws IOException {
     this(new String[] {});
   }
 
@@ -72,32 +72,26 @@ public class MultiTreeWordList implement
    * @param pathname
    *          the pathname of the used file.
    */
-  public MultiTreeWordList(String pathname) {
+  public MultiTreeWordList(String pathname) throws IOException {
+    this(new String[] { pathname });
+  }
 
+  /**
+   * Constructor from an open stream. This method will close the stream.
+   * 
+   * @param stream
+   *          the stream to read the file from.
+   * @param name
+   *          associated name
+   */
+  public MultiTreeWordList(InputStream stream, String name) throws IOException 
{
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();
-    File directory = new File(pathname);
-
-    if (!directory.isDirectory()) {
-      if (directory.getName().endsWith(".txt")) {
-        buildNewTree(directory.getAbsolutePath());
-      }
-      if (directory.getName().endsWith(".mtwl")) {
-        persistence.readMTWL(root, directory.getAbsolutePath());
-      }
-      return;
-    }
-
-    File[] listFiles = directory.listFiles();
 
-    for (File data : listFiles) {
-      if (data.getName().endsWith(".txt")) {
-        buildNewTree(data.getAbsolutePath());
-      }
-      if (data.getName().endsWith(".mtwl")) {
-        persistence.readMTWL(root, data.getAbsolutePath());
-      }
-    }
+    if (name.endsWith(".mtwl"))
+      persistence.readMTWL(root, stream, ENCODING);
+    if (name.endsWith(".txt"))
+      buildNewTree(stream, name);
   }
 
   /**
@@ -106,18 +100,31 @@ public class MultiTreeWordList implement
    * @param filename
    *          path of the file to create a TextWordList from
    */
-  public MultiTreeWordList(String[] pathnames) {
-
+  public MultiTreeWordList(String[] pathnames) throws IOException {
     this.root = new MultiTextNode();
     this.costMap = new EditDistanceCostMap();
 
     for (String pathname : pathnames) {
+      File directory = new File(pathname);
 
-      if (pathname.endsWith(".mtwl")) {
-        persistence.readMTWL(root, pathname);
-      }
-      if (pathname.endsWith(".txt")) {
-        buildNewTree(pathname);
+      if (!directory.isDirectory()) {
+        if (directory.getName().endsWith(".txt")) {
+          buildNewTree(new FileInputStream(pathname), directory.getName());
+        }
+        if (directory.getName().endsWith(".mtwl")) {
+          persistence.readMTWL(root, directory.getAbsolutePath());
+        }
+      } else {
+        File[] listFiles = directory.listFiles();
+
+        for (File data : listFiles) {
+          if (data.getName().endsWith(".txt")) {
+            buildNewTree(new FileInputStream(data.getAbsolutePath()), 
data.getName());
+          }
+          if (data.getName().endsWith(".mtwl")) {
+            persistence.readMTWL(root, data.getAbsolutePath());
+          }
+        }
       }
     }
   }
@@ -125,27 +132,20 @@ public class MultiTreeWordList implement
   /**
    * Creates a new Tree in the existing treeWordList from a file with path 
pathname
    * 
-   * @param pathname
-   *          Absolut path of the file containing the word for the treeWordList
-   */
-  public void buildNewTree(String pathname) {
+   * @param stream
+   *          Input stream for the file containing the words for the 
treeWordList
+   * @param name
+   *          Associated name for the file
+   */
+  public void buildNewTree(InputStream stream, String name) throws IOException 
{
+    BufferedReader br = new BufferedReader(new InputStreamReader(stream, 
ENCODING));
+    String s = null;
 
-    try {
-      File f = new File(pathname);
-      FileInputStream fstream = new FileInputStream(f);
-      BufferedReader br = new BufferedReader(new InputStreamReader(fstream, 
ENCODING));
-      String s = null;
-
-      while ((s = br.readLine()) != null) {
-        addWord(s.trim(), f.getName());
-      }
-      fstream.close();
-      br.close();
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
+    while ((s = br.readLine()) != null) {
+      addWord(s.trim(), name);
     }
+    stream.close();
+    br.close();
   }
 
   /**

Modified: 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java
 Fri Feb  8 11:39:00 2013
@@ -22,6 +22,7 @@ package org.apache.uima.textmarker.resou
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
@@ -40,17 +41,16 @@ public class MultiTreeWordListPersistenc
    * 
    * Reads the XML-File with the specified path and creates a TreeWordList.
    * 
-   * @param path
-   *          The location of the XML-File.
+   * @param stream
+   *          The open XML-File containing the TreeWordList. This method will 
close the stream.
    */
-  public void readMTWL(MultiTextNode root, String path) {
-    readMTWL(root, path, "UTF-8");
+  public void readMTWL(MultiTextNode root, String path) throws IOException {
+    readMTWL(root, new FileInputStream(path), "UTF-8");
   }
 
-  public void readMTWL(MultiTextNode root, String path, String encoding) {
+  public void readMTWL(MultiTextNode root, InputStream stream, String 
encoding) throws IOException {
     try {
-      FileInputStream input = new FileInputStream(path);
-      InputStreamReader stream = new InputStreamReader(input, encoding);
+      InputStreamReader streamReader = new InputStreamReader(stream, encoding);
       TrieXMLEventHandler handler = new TrieXMLEventHandler(root);
       SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
       SAXParser saxParser = saxParserFactory.newSAXParser();
@@ -59,9 +59,7 @@ public class MultiTreeWordListPersistenc
       // XMLReader reader = XMLReaderFactory.createXMLReader();
       reader.setContentHandler(handler);
       reader.setErrorHandler(handler);
-      reader.parse(new InputSource(stream));
-    } catch (IOException e) {
-      e.printStackTrace();
+      reader.parse(new InputSource(streamReader));
     } catch (SAXException e) {
       e.printStackTrace();
     } catch (ParserConfigurationException e) {

Modified: 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java
 Fri Feb  8 11:39:00 2013
@@ -24,6 +24,7 @@ import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
@@ -65,16 +66,37 @@ public class TreeWordList implements Tex
    * @param filename
    *          path of the file to create a TextWordList from
    */
-  public TreeWordList(String pathname) {
+  public TreeWordList(String pathname) throws IOException {
     if (pathname.endsWith(".twl")) {
-      readXML(pathname, "UTF-8");
+      File f = new File(pathname);
+      FileInputStream fstream = new FileInputStream(f);
+      readXML(fstream, "UTF-8");
     }
     if (pathname.endsWith(".txt")) {
-      buildNewTree(pathname);
+      // reading the file
+      File f = new File(pathname);
+      FileInputStream fstream = new FileInputStream(f);
+      buildNewTree(fstream);
     }
     this.name = new File(pathname).getName();
   }
 
+  /**
+   * Constructs a TreeWordList from an open stream with a given name
+   * 
+   * @param stream
+   *          path of the file to create a TextWordList from
+   */
+  public TreeWordList(InputStream stream, String name) throws IOException {
+    if (name.endsWith(".twl")) {
+      readXML(stream, "UTF-8");
+    }
+    if (name.endsWith(".txt")) {
+      buildNewTree(stream);
+    }
+    this.name = new File(name).getName();
+  }
+
   public TreeWordList(List<String> data) {
     buildNewTree(data);
     name = "local";
@@ -90,33 +112,23 @@ public class TreeWordList implements Tex
   /**
    * Creates a new Tree in the existing treeWordList from a file with path 
pathname
    * 
-   * @param pathname
-   *          Absolut path of the file containing the word for the treeWordList
+   * @param stream
+   *          Open InputStream containing the word for the treeWordList, this 
method will close the stream.
    */
-  public void buildNewTree(String pathname) {
-    try {
-      // reading the file
-      File f = new File(pathname);
-      FileInputStream fstream = new FileInputStream(f);
-      Scanner scan = new Scanner(fstream, "UTF-8");
+  public void buildNewTree(InputStream stream) throws IOException {
+      Scanner scan = new Scanner(stream, "UTF-8");
       // creating a new tree
       this.root = new TextNode();
       while (scan.hasNextLine()) {
         String s = scan.nextLine().trim();
-
+    
         if (s.endsWith("=")) {
           s = s.substring(0, s.length() - 1);
           s = s.trim();
         }
         addWord(s);
       }
-      fstream.close();
-
-    } catch (FileNotFoundException e) {
-      e.printStackTrace();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
+      scan.close();
   }
 
   /**
@@ -296,10 +308,9 @@ public class TreeWordList implements Tex
     }
   }
 
-  public void readXML(String path, String encoding) {
+  public void readXML(InputStream stream, String encoding) throws IOException {
     try {
-      FileInputStream input = new FileInputStream(path);
-      InputStreamReader stream = new InputStreamReader(input, encoding);
+      InputStreamReader streamReader = new InputStreamReader(stream, encoding);
       this.root = new TextNode();
       XMLEventHandler handler = new XMLEventHandler(root);
       SAXParserFactory factory = SAXParserFactory.newInstance();
@@ -308,7 +319,7 @@ public class TreeWordList implements Tex
       // XMLReader reader = XMLReaderFactory.createXMLReader();
       reader.setContentHandler(handler);
       reader.setErrorHandler(handler);
-      reader.parse(new InputSource(stream));
+      reader.parse(new InputSource(streamReader));
     } catch (SAXParseException spe) {
       StringBuffer sb = new StringBuffer(spe.toString());
       sb.append("\n  Line number: " + spe.getLineNumber());
@@ -319,8 +330,6 @@ public class TreeWordList implements Tex
     } catch (SAXException se) {
       System.out.println("loadDOM threw " + se);
       se.printStackTrace(System.out);
-    } catch (IOException e) {
-      e.printStackTrace();
     } catch (ParserConfigurationException e) {
       e.printStackTrace();
     }

Modified: 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java
 Fri Feb  8 11:39:00 2013
@@ -20,10 +20,12 @@
 package org.apache.uima.textmarker.utils.twl;
 
 import java.io.File;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.uima.textmarker.addons.TextMarkerAddonsPlugin;
 import org.apache.uima.textmarker.resource.MultiTreeWordList;
 import org.eclipse.core.commands.ExecutionEvent;
 import org.eclipse.core.commands.ExecutionException;
@@ -80,7 +82,13 @@ public class MultiTWLConverterHandler im
         }
         monitor.beginTask("Compiling generated.mtwl...", 1);
         if (!paths.isEmpty()) {
-          MultiTreeWordList trie = new MultiTreeWordList(paths.toArray(new 
String[0]));
+          MultiTreeWordList trie;
+          try {
+            trie = new MultiTreeWordList(paths.toArray(new String[0]));
+          } catch (IOException e) {
+            TextMarkerAddonsPlugin.error(e);
+            return Status.CANCEL_STATUS;
+          }
 
           IPath parent = first.getLocation().removeLastSegments(1);
           IPath newPath = parent.append("generated.mtwl");
@@ -92,8 +100,8 @@ public class MultiTWLConverterHandler im
 
           IWorkspaceRoot myWorkspaceRoot = 
ResourcesPlugin.getWorkspace().getRoot();
           IContainer container = 
myWorkspaceRoot.getContainerForLocation(parent);
-          final String localPath = container.getProjectRelativePath() + "/" + 
container.getName()
-                  + "/" + "generated.mtwl";
+          // final String localPath = container.getProjectRelativePath() + "/" 
+ container.getName()
+          // + "/" + "generated.mtwl";
           try {
             container.getParent().refreshLocal(IResource.DEPTH_INFINITE, new 
NullProgressMonitor());
           } catch (CoreException e) {

Modified: 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java?rev=1443947&r1=1443946&r2=1443947&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java
 Fri Feb  8 11:39:00 2013
@@ -19,10 +19,12 @@
 
 package org.apache.uima.textmarker.utils.twl;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.uima.textmarker.addons.TextMarkerAddonsPlugin;
 import org.apache.uima.textmarker.resource.TreeWordList;
 import org.eclipse.core.commands.ExecutionEvent;
 import org.eclipse.core.commands.ExecutionException;
@@ -60,7 +62,7 @@ public class TWLConverterHandler impleme
       if (HandlerUtil.getCurrentSelection(event) instanceof 
IStructuredSelection) {
         StructuredSelection selection = (StructuredSelection) HandlerUtil
                 .getCurrentSelection(event);
-        Iterator<Object> iter = selection.iterator();
+        Iterator<?> iter = selection.iterator();
         while (iter.hasNext()) {
           Object object = iter.next();
           if (object instanceof IFile) {
@@ -73,7 +75,13 @@ public class TWLConverterHandler impleme
       for (IFile file : files) {
         monitor.setTaskName("Compiling " + file.getLocation().lastSegment() + 
"...");
         String path = file.getRawLocation().toString();
-        TreeWordList list = new TreeWordList(path);
+        TreeWordList list;
+        try {
+          list = new TreeWordList(path);
+        } catch (IOException e) {
+          TextMarkerAddonsPlugin.error(e);
+          return Status.CANCEL_STATUS;
+        }
         String exportPath = path.substring(0, path.length() - 3) + "twl";
         list.createXMLFile(exportPath, "UTF-8");
         IWorkspaceRoot myWorkspaceRoot = 
ResourcesPlugin.getWorkspace().getRoot();


Reply via email to