Author: pkluegl Date: Fri Feb 8 11:39:00 2013 New Revision: 1443947 URL: http://svn.apache.org/r1443947 Log: UIMA-2647 - applied patch for loading word lists in jars - adapted workbench commands for creating tries
Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java Fri Feb 8 11:39:00 2013 @@ -20,6 +20,8 @@ package org.apache.uima.textmarker; import java.io.File; +import java.io.IOException; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -201,18 +203,42 @@ public class TextMarkerEnvironment { TextMarkerWordList result = wordLists.get(list); if (result == null) { boolean found = false; - for (String eachPath : resourcePaths) { - File file = new File(eachPath, list); - if (!file.exists()) { - continue; + if (resourcePaths != null) { + for (String eachPath : resourcePaths) { + File file = new File(eachPath, list); + if (!file.exists()) { + continue; + } + found = true; + try { + if (file.getName().endsWith("mtwl")) { + wordLists.put(list, new MultiTreeWordList(file.getAbsolutePath())); + } else { + wordLists.put(list, new TreeWordList(file.getAbsolutePath())); + } + } catch (IOException e) { + Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, + "Error reading word list", e); + found = false; + } + break; } - found = true; - if (file.getName().endsWith("mtwl")) { - wordLists.put(list, new MultiTreeWordList(file.getAbsolutePath())); - } else { - wordLists.put(list, new TreeWordList(file.getAbsolutePath())); + } + if (!found) { + InputStream stream = ClassLoader.getSystemResourceAsStream(list); + if (stream != null) { + found = true; + try { + if (list.endsWith(".mtwl")) + wordLists.put(list, new MultiTreeWordList(stream, list)); + else + wordLists.put(list, new TreeWordList(stream, list)); + } catch (IOException e) { + Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, + "Error reading word list from classpath", e); + found = false; + } } - break; } if (!found) { Logger.getLogger(this.getClass().getName()).log(Level.SEVERE, "Can't find " + list + "!"); @@ -253,7 +279,7 @@ public class TextMarkerEnvironment { private Object getInitialValue(String name, Class<?> type) { Object init = initializedVariables.get(name); if (init != null) { - if(init instanceof List) { + if (init instanceof List) { ArrayList<Object> list = new ArrayList<Object>(); list.addAll((Collection<? extends Object>) init); return list; @@ -407,7 +433,7 @@ public class TextMarkerEnvironment { @SuppressWarnings("unchecked") public void setInitialVariableValue(String var, Object value) { if (ownsVariable(var)) { - if(value instanceof List) { + if (value instanceof List) { List<Object> initValue = new ArrayList<Object>(); initValue.addAll((Collection<? extends Object>) value); initializedVariables.put(var, initValue); Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordList.java Fri Feb 8 11:39:00 2013 @@ -22,8 +22,8 @@ package org.apache.uima.textmarker.resou import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collection; @@ -62,7 +62,7 @@ public class MultiTreeWordList implement /** * Default constructor. */ - public MultiTreeWordList() { + public MultiTreeWordList() throws IOException { this(new String[] {}); } @@ -72,32 +72,26 @@ public class MultiTreeWordList implement * @param pathname * the pathname of the used file. */ - public MultiTreeWordList(String pathname) { + public MultiTreeWordList(String pathname) throws IOException { + this(new String[] { pathname }); + } + /** + * Constructor from an open stream. This method will close the stream. + * + * @param stream + * the stream to read the file from. + * @param name + * associated name + */ + public MultiTreeWordList(InputStream stream, String name) throws IOException { this.root = new MultiTextNode(); this.costMap = new EditDistanceCostMap(); - File directory = new File(pathname); - - if (!directory.isDirectory()) { - if (directory.getName().endsWith(".txt")) { - buildNewTree(directory.getAbsolutePath()); - } - if (directory.getName().endsWith(".mtwl")) { - persistence.readMTWL(root, directory.getAbsolutePath()); - } - return; - } - - File[] listFiles = directory.listFiles(); - for (File data : listFiles) { - if (data.getName().endsWith(".txt")) { - buildNewTree(data.getAbsolutePath()); - } - if (data.getName().endsWith(".mtwl")) { - persistence.readMTWL(root, data.getAbsolutePath()); - } - } + if (name.endsWith(".mtwl")) + persistence.readMTWL(root, stream, ENCODING); + if (name.endsWith(".txt")) + buildNewTree(stream, name); } /** @@ -106,18 +100,31 @@ public class MultiTreeWordList implement * @param filename * path of the file to create a TextWordList from */ - public MultiTreeWordList(String[] pathnames) { - + public MultiTreeWordList(String[] pathnames) throws IOException { this.root = new MultiTextNode(); this.costMap = new EditDistanceCostMap(); for (String pathname : pathnames) { + File directory = new File(pathname); - if (pathname.endsWith(".mtwl")) { - persistence.readMTWL(root, pathname); - } - if (pathname.endsWith(".txt")) { - buildNewTree(pathname); + if (!directory.isDirectory()) { + if (directory.getName().endsWith(".txt")) { + buildNewTree(new FileInputStream(pathname), directory.getName()); + } + if (directory.getName().endsWith(".mtwl")) { + persistence.readMTWL(root, directory.getAbsolutePath()); + } + } else { + File[] listFiles = directory.listFiles(); + + for (File data : listFiles) { + if (data.getName().endsWith(".txt")) { + buildNewTree(new FileInputStream(data.getAbsolutePath()), data.getName()); + } + if (data.getName().endsWith(".mtwl")) { + persistence.readMTWL(root, data.getAbsolutePath()); + } + } } } } @@ -125,27 +132,20 @@ public class MultiTreeWordList implement /** * Creates a new Tree in the existing treeWordList from a file with path pathname * - * @param pathname - * Absolut path of the file containing the word for the treeWordList - */ - public void buildNewTree(String pathname) { + * @param stream + * Input stream for the file containing the words for the treeWordList + * @param name + * Associated name for the file + */ + public void buildNewTree(InputStream stream, String name) throws IOException { + BufferedReader br = new BufferedReader(new InputStreamReader(stream, ENCODING)); + String s = null; - try { - File f = new File(pathname); - FileInputStream fstream = new FileInputStream(f); - BufferedReader br = new BufferedReader(new InputStreamReader(fstream, ENCODING)); - String s = null; - - while ((s = br.readLine()) != null) { - addWord(s.trim(), f.getName()); - } - fstream.close(); - br.close(); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); + while ((s = br.readLine()) != null) { + addWord(s.trim(), name); } + stream.close(); + br.close(); } /** Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/MultiTreeWordListPersistence.java Fri Feb 8 11:39:00 2013 @@ -22,6 +22,7 @@ package org.apache.uima.textmarker.resou import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; @@ -40,17 +41,16 @@ public class MultiTreeWordListPersistenc * * Reads the XML-File with the specified path and creates a TreeWordList. * - * @param path - * The location of the XML-File. + * @param stream + * The open XML-File containing the TreeWordList. This method will close the stream. */ - public void readMTWL(MultiTextNode root, String path) { - readMTWL(root, path, "UTF-8"); + public void readMTWL(MultiTextNode root, String path) throws IOException { + readMTWL(root, new FileInputStream(path), "UTF-8"); } - public void readMTWL(MultiTextNode root, String path, String encoding) { + public void readMTWL(MultiTextNode root, InputStream stream, String encoding) throws IOException { try { - FileInputStream input = new FileInputStream(path); - InputStreamReader stream = new InputStreamReader(input, encoding); + InputStreamReader streamReader = new InputStreamReader(stream, encoding); TrieXMLEventHandler handler = new TrieXMLEventHandler(root); SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); SAXParser saxParser = saxParserFactory.newSAXParser(); @@ -59,9 +59,7 @@ public class MultiTreeWordListPersistenc // XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(handler); reader.setErrorHandler(handler); - reader.parse(new InputSource(stream)); - } catch (IOException e) { - e.printStackTrace(); + reader.parse(new InputSource(streamReader)); } catch (SAXException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { Modified: uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-core/src/main/java/org/apache/uima/textmarker/resource/TreeWordList.java Fri Feb 8 11:39:00 2013 @@ -24,6 +24,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.io.Writer; @@ -65,16 +66,37 @@ public class TreeWordList implements Tex * @param filename * path of the file to create a TextWordList from */ - public TreeWordList(String pathname) { + public TreeWordList(String pathname) throws IOException { if (pathname.endsWith(".twl")) { - readXML(pathname, "UTF-8"); + File f = new File(pathname); + FileInputStream fstream = new FileInputStream(f); + readXML(fstream, "UTF-8"); } if (pathname.endsWith(".txt")) { - buildNewTree(pathname); + // reading the file + File f = new File(pathname); + FileInputStream fstream = new FileInputStream(f); + buildNewTree(fstream); } this.name = new File(pathname).getName(); } + /** + * Constructs a TreeWordList from an open stream with a given name + * + * @param stream + * path of the file to create a TextWordList from + */ + public TreeWordList(InputStream stream, String name) throws IOException { + if (name.endsWith(".twl")) { + readXML(stream, "UTF-8"); + } + if (name.endsWith(".txt")) { + buildNewTree(stream); + } + this.name = new File(name).getName(); + } + public TreeWordList(List<String> data) { buildNewTree(data); name = "local"; @@ -90,33 +112,23 @@ public class TreeWordList implements Tex /** * Creates a new Tree in the existing treeWordList from a file with path pathname * - * @param pathname - * Absolut path of the file containing the word for the treeWordList + * @param stream + * Open InputStream containing the word for the treeWordList, this method will close the stream. */ - public void buildNewTree(String pathname) { - try { - // reading the file - File f = new File(pathname); - FileInputStream fstream = new FileInputStream(f); - Scanner scan = new Scanner(fstream, "UTF-8"); + public void buildNewTree(InputStream stream) throws IOException { + Scanner scan = new Scanner(stream, "UTF-8"); // creating a new tree this.root = new TextNode(); while (scan.hasNextLine()) { String s = scan.nextLine().trim(); - + if (s.endsWith("=")) { s = s.substring(0, s.length() - 1); s = s.trim(); } addWord(s); } - fstream.close(); - - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } + scan.close(); } /** @@ -296,10 +308,9 @@ public class TreeWordList implements Tex } } - public void readXML(String path, String encoding) { + public void readXML(InputStream stream, String encoding) throws IOException { try { - FileInputStream input = new FileInputStream(path); - InputStreamReader stream = new InputStreamReader(input, encoding); + InputStreamReader streamReader = new InputStreamReader(stream, encoding); this.root = new TextNode(); XMLEventHandler handler = new XMLEventHandler(root); SAXParserFactory factory = SAXParserFactory.newInstance(); @@ -308,7 +319,7 @@ public class TreeWordList implements Tex // XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setContentHandler(handler); reader.setErrorHandler(handler); - reader.parse(new InputSource(stream)); + reader.parse(new InputSource(streamReader)); } catch (SAXParseException spe) { StringBuffer sb = new StringBuffer(spe.toString()); sb.append("\n Line number: " + spe.getLineNumber()); @@ -319,8 +330,6 @@ public class TreeWordList implements Tex } catch (SAXException se) { System.out.println("loadDOM threw " + se); se.printStackTrace(System.out); - } catch (IOException e) { - e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } Modified: uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/MultiTWLConverterHandler.java Fri Feb 8 11:39:00 2013 @@ -20,10 +20,12 @@ package org.apache.uima.textmarker.utils.twl; import java.io.File; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.uima.textmarker.addons.TextMarkerAddonsPlugin; import org.apache.uima.textmarker.resource.MultiTreeWordList; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; @@ -80,7 +82,13 @@ public class MultiTWLConverterHandler im } monitor.beginTask("Compiling generated.mtwl...", 1); if (!paths.isEmpty()) { - MultiTreeWordList trie = new MultiTreeWordList(paths.toArray(new String[0])); + MultiTreeWordList trie; + try { + trie = new MultiTreeWordList(paths.toArray(new String[0])); + } catch (IOException e) { + TextMarkerAddonsPlugin.error(e); + return Status.CANCEL_STATUS; + } IPath parent = first.getLocation().removeLastSegments(1); IPath newPath = parent.append("generated.mtwl"); @@ -92,8 +100,8 @@ public class MultiTWLConverterHandler im IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot(); IContainer container = myWorkspaceRoot.getContainerForLocation(parent); - final String localPath = container.getProjectRelativePath() + "/" + container.getName() - + "/" + "generated.mtwl"; + // final String localPath = container.getProjectRelativePath() + "/" + container.getName() + // + "/" + "generated.mtwl"; try { container.getParent().refreshLocal(IResource.DEPTH_INFINITE, new NullProgressMonitor()); } catch (CoreException e) { Modified: uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java URL: http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java?rev=1443947&r1=1443946&r2=1443947&view=diff ============================================================================== --- uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java (original) +++ uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/utils/twl/TWLConverterHandler.java Fri Feb 8 11:39:00 2013 @@ -19,10 +19,12 @@ package org.apache.uima.textmarker.utils.twl; +import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import org.apache.uima.textmarker.addons.TextMarkerAddonsPlugin; import org.apache.uima.textmarker.resource.TreeWordList; import org.eclipse.core.commands.ExecutionEvent; import org.eclipse.core.commands.ExecutionException; @@ -60,7 +62,7 @@ public class TWLConverterHandler impleme if (HandlerUtil.getCurrentSelection(event) instanceof IStructuredSelection) { StructuredSelection selection = (StructuredSelection) HandlerUtil .getCurrentSelection(event); - Iterator<Object> iter = selection.iterator(); + Iterator<?> iter = selection.iterator(); while (iter.hasNext()) { Object object = iter.next(); if (object instanceof IFile) { @@ -73,7 +75,13 @@ public class TWLConverterHandler impleme for (IFile file : files) { monitor.setTaskName("Compiling " + file.getLocation().lastSegment() + "..."); String path = file.getRawLocation().toString(); - TreeWordList list = new TreeWordList(path); + TreeWordList list; + try { + list = new TreeWordList(path); + } catch (IOException e) { + TextMarkerAddonsPlugin.error(e); + return Status.CANCEL_STATUS; + } String exportPath = path.substring(0, path.length() - 3) + "twl"; list.createXMLFile(exportPath, "UTF-8"); IWorkspaceRoot myWorkspaceRoot = ResourcesPlugin.getWorkspace().getRoot();