Revision: 19671 http://sourceforge.net/p/gate/code/19671 Author: markagreenwood Date: 2016-10-11 09:06:31 +0000 (Tue, 11 Oct 2016) Log Message: ----------- some more bug/performance fixes
Modified Paths: -------------- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java Modified: gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java =================================================================== --- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java 2016-10-11 01:22:31 UTC (rev 19670) +++ gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java 2016-10-11 09:06:31 UTC (rev 19671) @@ -10,10 +10,8 @@ import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; -import java.util.Iterator; import java.util.List; import java.util.Map; -import java.util.Set; import org.jdom.Element; import org.jdom.JDOMException; Modified: gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java =================================================================== --- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java 2016-10-11 01:22:31 UTC (rev 19670) +++ gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java 2016-10-11 09:06:31 UTC (rev 19671) @@ -479,7 +479,7 @@ if(string.trim().length() > 0) { features.put("string", string); try { - set.add(new Long(start), new Long(i), Constants.ANNIC_TOKEN, + set.add(Long.valueOf(start), Long.valueOf(i), Constants.ANNIC_TOKEN, features); } catch(InvalidOffsetException ioe) { @@ -501,7 +501,7 @@ if(string.trim().length() > 0) { features.put("string", string); try { - set.add(new Long(start), new Long(gateContent.length()), + set.add(Long.valueOf(start), Long.valueOf(gateContent.length()), Constants.ANNIC_TOKEN, features); } catch(InvalidOffsetException ioe) { @@ -541,27 +541,27 @@ File locationFile = new File(location); File folder = new File(locationFile, Constants.SERIALIZED_FOLDER_NAME); if(!folder.exists()) { - folder.mkdirs(); + if (!folder.mkdirs()) { + throw new IOException( + "Directory could not be created :" + folder.getAbsolutePath()); + } } - if(!folder.exists()) { throw new IOException( - "Directory could not be created :" + folder.getAbsolutePath()); } folder = new File(folder, folderName); if(!folder.exists()) { - folder.mkdirs(); + if (!folder.mkdirs()){ + throw new IOException( + "Directory could not be created :" + folder.getAbsolutePath()); + } } - if(!folder.exists()) { throw new IOException( - "Directory could not be created :" + folder.getAbsolutePath()); } + File outputFile = new File(folder, fileName + ".annic"); + try (OutputStream file = new FileOutputStream(outputFile); + OutputStream buffer = new BufferedOutputStream(file); + ObjectOutput output = new ObjectOutputStream(buffer);) { - File outputFile = new File(folder, fileName + ".annic"); - ObjectOutput output = null; - OutputStream file = new FileOutputStream(outputFile); - OutputStream buffer = new BufferedOutputStream(file); - output = new ObjectOutputStream(buffer); - output.writeObject(tokenStream); - if(output != null) { - output.close(); + output.writeObject(tokenStream); + output.flush(); } } @@ -571,7 +571,7 @@ * @author niraj * */ - private class OffsetGroup { + private static class OffsetGroup { Long startOffset; Long endOffset; @@ -607,7 +607,7 @@ // the index Unit Annotation Type is not specified // therefore we consider the entire document as a single unit OffsetGroup group = new OffsetGroup(); - group.startOffset = new Long(0); + group.startOffset = 0L; group.endOffset = document.getContent().size(); unitOffsetsSet.add(group); } @@ -674,7 +674,7 @@ group.endOffset)); } - if(tokens == null || tokens.size() == 0) return null; + if(tokens.isEmpty()) return null; Collections.sort(tokens, new OffsetComparator()); @@ -694,11 +694,7 @@ int endOffset = annot.getEndNode().getOffset().intValue(); String text = document.getContent().toString().substring(startOffset, endOffset); - if(text == null) { - continue; - } - - + Token token1 = new Token(type, startOffset, endOffset, "*"); // each token has four values Modified: gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java =================================================================== --- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java 2016-10-11 01:22:31 UTC (rev 19670) +++ gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java 2016-10-11 09:06:31 UTC (rev 19671) @@ -19,6 +19,8 @@ import java.util.Map; import java.util.Set; +import org.apache.commons.io.FileUtils; + import gate.creole.annic.Constants; import gate.creole.annic.IndexException; import gate.creole.annic.Indexer; @@ -98,7 +100,12 @@ String baseTokenAnnotationType = (String)parameters .get(Constants.BASE_TOKEN_ANNOTATION_TYPE); - if(baseTokenAnnotationType.indexOf(".") > -1 || baseTokenAnnotationType.indexOf("=") > -1 + + if(baseTokenAnnotationType == null || baseTokenAnnotationType.trim().length() == 0) { + baseTokenAnnotationType = Constants.ANNIC_TOKEN; + parameters.put(Constants.BASE_TOKEN_ANNOTATION_TYPE, + Constants.ANNIC_TOKEN); + } else if(baseTokenAnnotationType.indexOf(".") > -1 || baseTokenAnnotationType.indexOf("=") > -1 || baseTokenAnnotationType.indexOf(";") > -1 || baseTokenAnnotationType.indexOf(",") > -1) { throw new IndexException( "Base token annotation type cannot have '.' , '=', ',' or ';; in it"); @@ -110,14 +117,7 @@ if(DEBUG) { System.out.println("BTAT : " + baseTokenAnnotationType); System.out.println("IUAT : " + indexUnitAnnotationType); - } - - if(baseTokenAnnotationType == null - || baseTokenAnnotationType.trim().length() == 0) { - baseTokenAnnotationType = Constants.ANNIC_TOKEN; - parameters.put(Constants.BASE_TOKEN_ANNOTATION_TYPE, - Constants.ANNIC_TOKEN); - } + } } /** @@ -227,9 +227,10 @@ /** Deletes the index. */ @Override public void deleteIndex() throws IndexException { - boolean isDeleted = true; + if(parameters == null) return; File dir = null; + //TODO should we use the gate util Files mehotd for this try { dir = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL)) .toURI()); @@ -237,23 +238,8 @@ dir = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL)) .getFile()); } - - if(dir.exists() && dir.isDirectory()) { - File[] files = dir.listFiles(); - for(int i = 0; i < files.length; i++) { - File f = files[i]; - if(f.isDirectory()) { - File[] subFiles = f.listFiles(); - for(int j = 0; j < subFiles.length; j++) { - File sf = subFiles[j]; - sf.delete(); - } - } - f.delete(); - } - } - isDeleted = dir.delete(); - if(!isDeleted) { + + if(!FileUtils.deleteQuietly(dir)) { throw new IndexException("Can't delete directory" + dir.getAbsolutePath()); } } @@ -267,6 +253,7 @@ throws IndexException { String location = null; + //TODO should we use the gate util Files mehotd for this try { location = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL)) .toURI()).getAbsolutePath(); @@ -421,7 +408,7 @@ .get(Constants.ANNOTATION_SETS_NAMES_TO_INCLUDE)); Boolean createTokensAutomatically = (Boolean) parameters.get(Constants.CREATE_TOKENS_AUTOMATICALLY); - if(createTokensAutomatically == null) createTokensAutomatically = new Boolean(true); + if(createTokensAutomatically == null) createTokensAutomatically = Boolean.TRUE; String idToUse = gateDoc.getLRPersistenceId() == null ? gateDoc.getName() @@ -515,18 +502,19 @@ java.io.FileWriter fileWriter = new java.io.FileWriter(file); Map<String,Object> indexInformation = new HashMap<String,Object>(); - Iterator<String> iter = parameters.keySet().iterator(); - while(iter.hasNext()) { - String key = iter.next(); + //Iterator<String> iter = parameters.keySet().iterator(); + //while(iter.hasNext()) { + for (Map.Entry<String, Object> entry : parameters.entrySet()){ + String key = entry.getKey(); if(key.equals(Constants.INDEX_LOCATION_URL)) continue; - indexInformation.put(key, parameters.get(key)); + indexInformation.put(key, entry.getValue()); } indexInformation.put(Constants.CORPUS_INDEX_FEATURE, Constants.CORPUS_INDEX_FEATURE_VALUE); if(corpus != null) - indexInformation.put(Constants.CORPUS_SIZE, new Integer(corpus - .getDocumentNames().size())); + indexInformation.put(Constants.CORPUS_SIZE, corpus + .getDocumentNames().size()); // we would use XStream library to store annic patterns com.thoughtworks.xstream.XStream xstream = new com.thoughtworks.xstream.XStream(); Modified: gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java =================================================================== --- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java 2016-10-11 01:22:31 UTC (rev 19670) +++ gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java 2016-10-11 09:06:31 UTC (rev 19671) @@ -10,6 +10,8 @@ import java.io.BufferedInputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileReader; +import java.io.IOException; import java.io.InputStream; import java.io.ObjectInput; import java.io.ObjectInputStream; @@ -19,9 +21,12 @@ import java.util.List; import java.util.Map; +import com.thoughtworks.xstream.XStream; +import com.thoughtworks.xstream.io.xml.StaxDriver; + +import gate.creole.annic.Constants; import gate.creole.annic.Pattern; import gate.creole.annic.PatternAnnotation; -import gate.creole.annic.Constants; import gate.creole.annic.SearchException; import gate.creole.annic.apache.lucene.search.Hits; import gate.creole.annic.apache.lucene.search.Query; @@ -169,16 +174,17 @@ try { // first find out the location of Index - String temp = ""; + //TODO does this just replace \ with / if so we should do this better + StringBuilder temp = new StringBuilder(); for(int i = 0; i < indexLocation.length(); i++) { if(indexLocation.charAt(i) == '\\') { - temp += "/"; + temp.append("/"); } else { - temp += indexLocation.charAt(i); + temp.append(indexLocation.charAt(i)); } } - indexLocation = temp; + indexLocation = temp.toString(); /* * for each different location there can be different @@ -208,21 +214,16 @@ return false; } - java.io.FileReader fileReader = new java.io.FileReader(indexLocation - + "LuceneIndexDefinition.xml"); + Map<String,Object> indexInformation = null; + + // other wise read this file + XStream xstream = new XStream(new StaxDriver()); + try (FileReader fileReader = + new FileReader(indexLocation + "LuceneIndexDefinition.xml");) { - Map<String,Object> indexInformation = null; - try { - // other wise read this file - com.thoughtworks.xstream.XStream xstream = new com.thoughtworks.xstream.XStream( - new com.thoughtworks.xstream.io.xml.StaxDriver()); - // Saving was accomplished by using XML serialization of the map. - indexInformation = (Map<String,Object>)xstream.fromXML(fileReader); + indexInformation = (Map<String, Object>)xstream.fromXML(fileReader); } - finally { - fileReader.close(); - } // find out if the current index was indexed by annicIndexPR String indexedWithANNICIndexPR = (String)indexInformation @@ -293,7 +294,7 @@ // iterate through each result and collect necessary // information for(int hitIndex = 0; hitIndex < hits.length(); hitIndex++) { - int index = firstTermPositions[0].indexOf(new Integer(hits + int index = firstTermPositions[0].indexOf(Integer.valueOf(hits .id(hitIndex))); // we fetch all the first term positions for the query @@ -354,8 +355,7 @@ if(searchResultInfoMap.size() > 0) success = true; else success = false; - } - catch(Exception e) { + } catch(IOException | gate.creole.ir.SearchException e) { throw new SearchException(e); } @@ -446,7 +446,7 @@ * if none of the found patterns is valid continue with the next * query */ - if(patternResult == null || patternResult.numberOfPatterns == 0) + if(patternResult.numberOfPatterns == 0) continue; /* @@ -498,9 +498,9 @@ List<Pattern> pats = locatePatterns((String)aResult.getDocumentID(), aResult.getAnnotationSetName(), aResult.getGateAnnotations(), firstTermPositions, patternLength, aResult.getQuery()); - if(pats != null) { - annicPatterns.addAll(pats); - } + + annicPatterns.addAll(pats); + } return annicPatterns; } @@ -647,19 +647,19 @@ File folder = new File(indexDirectory, Constants.SERIALIZED_FOLDER_NAME); folder = new File(folder, documentFolder); File fileToLoad = new File(folder, documentID + ".annic"); - InputStream file = new FileInputStream(fileToLoad); - InputStream buffer = new BufferedInputStream(file); - ObjectInput input = new ObjectInputStream(buffer); + + try (InputStream file = new FileInputStream(fileToLoad); + InputStream buffer = new BufferedInputStream(file); + ObjectInput input = new ObjectInputStream(buffer);) { - // deserialize the List - @SuppressWarnings("unchecked") - List<gate.creole.annic.apache.lucene.analysis.Token> recoveredTokenStream = - (List<gate.creole.annic.apache.lucene.analysis.Token>)input.readObject(); - if(input != null) { - // close "input" and its underlying streams - input.close(); - } - return recoveredTokenStream; + // deserialize the List + @SuppressWarnings("unchecked") + List<gate.creole.annic.apache.lucene.analysis.Token> recoveredTokenStream = + (List<gate.creole.annic.apache.lucene.analysis.Token>)input + .readObject(); + + return recoveredTokenStream; + } } /** @@ -709,13 +709,13 @@ // if annotType == "*", the query was {AnnotType} if(annotType.equals("*")) { if(type.equals(annotText) && annotType.equals(text)) { - positions.add(new Integer(token.getPosition())); + positions.add(token.getPosition()); } } // the query is Token == "string" else { if(annotText.equals(type) && annotType.equals(text)) { - positions.add(new Integer(token.getPosition())); + positions.add(token.getPosition()); } } } @@ -871,7 +871,7 @@ } } // we send the endoffset to our GUI class - patLens.add(new Integer(upto)); + patLens.add(upto); /* * k holds the position of the first token in right context @@ -900,7 +900,7 @@ tempPos = token.getPosition(); } } - patLens.add(new Integer(upto)); + patLens.add(upto); k++; } int maxEndOffset = upto; @@ -1021,7 +1021,7 @@ * * @author niraj */ - private class PatternResult { + private static class PatternResult { int numberOfPatterns; List<List<PatternAnnotation>> gateAnnotations; @@ -1040,7 +1040,7 @@ * @author niraj * */ - private class QueryItem { + private static class QueryItem { @SuppressWarnings("unused") float score; Modified: gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java =================================================================== --- gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java 2016-10-11 01:22:31 UTC (rev 19670) +++ gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java 2016-10-11 09:06:31 UTC (rev 19671) @@ -327,7 +327,7 @@ * Gets the number of base token annotations to show in the context. */ public Integer getContextWindow() { - return new Integer(this.contextWindow); + return this.contextWindow; } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs