Revision: 19671
          http://sourceforge.net/p/gate/code/19671
Author:   markagreenwood
Date:     2016-10-11 09:06:31 +0000 (Tue, 11 Oct 2016)
Log Message:
-----------
some more bug/performance fixes

Modified Paths:
--------------
    gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java
    
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java
    
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java
    
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java
    
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java

Modified: 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java
===================================================================
--- 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java    
    2016-10-11 01:22:31 UTC (rev 19670)
+++ 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/Parser.java    
    2016-10-11 09:06:31 UTC (rev 19671)
@@ -10,10 +10,8 @@
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 import org.jdom.Element;
 import org.jdom.JDOMException;

Modified: 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java
===================================================================
--- 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java
 2016-10-11 01:22:31 UTC (rev 19670)
+++ 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneDocument.java
 2016-10-11 09:06:31 UTC (rev 19671)
@@ -479,7 +479,7 @@
           if(string.trim().length() > 0) {
             features.put("string", string);
             try {
-              set.add(new Long(start), new Long(i), Constants.ANNIC_TOKEN,
+              set.add(Long.valueOf(start), Long.valueOf(i), 
Constants.ANNIC_TOKEN,
                 features);
             }
             catch(InvalidOffsetException ioe) {
@@ -501,7 +501,7 @@
       if(string.trim().length() > 0) {
         features.put("string", string);
         try {
-          set.add(new Long(start), new Long(gateContent.length()),
+          set.add(Long.valueOf(start), Long.valueOf(gateContent.length()),
             Constants.ANNIC_TOKEN, features);
         }
         catch(InvalidOffsetException ioe) {
@@ -541,27 +541,27 @@
     File locationFile = new File(location);
     File folder = new File(locationFile, Constants.SERIALIZED_FOLDER_NAME);
     if(!folder.exists()) {
-      folder.mkdirs();
+      if (!folder.mkdirs()) {
+        throw new IOException(
+            "Directory could not be created :" + folder.getAbsolutePath()); 
+      }
     }
-    if(!folder.exists()) { throw new IOException(
-      "Directory could not be created :" + folder.getAbsolutePath()); }
 
     folder = new File(folder, folderName);
     if(!folder.exists()) {
-      folder.mkdirs();
+      if (!folder.mkdirs()){
+        throw new IOException(
+            "Directory could not be created :" + folder.getAbsolutePath());
+      }
     }
 
-    if(!folder.exists()) { throw new IOException(
-      "Directory could not be created :" + folder.getAbsolutePath()); }
+    File outputFile = new File(folder, fileName + ".annic");
+    try (OutputStream file = new FileOutputStream(outputFile);
+        OutputStream buffer = new BufferedOutputStream(file);
+        ObjectOutput output = new ObjectOutputStream(buffer);) {
 
-    File outputFile = new File(folder, fileName + ".annic");
-    ObjectOutput output = null;
-    OutputStream file = new FileOutputStream(outputFile);
-    OutputStream buffer = new BufferedOutputStream(file);
-    output = new ObjectOutputStream(buffer);
-    output.writeObject(tokenStream);
-    if(output != null) {
-      output.close();
+      output.writeObject(tokenStream);
+      output.flush();
     }
   }
 
@@ -571,7 +571,7 @@
    * @author niraj
    * 
    */
-  private class OffsetGroup {
+  private static class OffsetGroup {
     Long startOffset;
 
     Long endOffset;
@@ -607,7 +607,7 @@
       // the index Unit Annotation Type is not specified
       // therefore we consider the entire document as a single unit
       OffsetGroup group = new OffsetGroup();
-      group.startOffset = new Long(0);
+      group.startOffset = 0L;
       group.endOffset = document.getContent().size();
       unitOffsetsSet.add(group);
     }
@@ -674,7 +674,7 @@
           group.endOffset));
       }
 
-      if(tokens == null || tokens.size() == 0) return null;
+      if(tokens.isEmpty()) return null;
 
       Collections.sort(tokens, new OffsetComparator());
 
@@ -694,11 +694,7 @@
         int endOffset = annot.getEndNode().getOffset().intValue();
         String text =
           document.getContent().toString().substring(startOffset, endOffset);
-        if(text == null) {
-          continue;
-        }
-
-
+       
         Token token1 = new Token(type, startOffset, endOffset, "*");
 
         // each token has four values

Modified: 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java
===================================================================
--- 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java
  2016-10-11 01:22:31 UTC (rev 19670)
+++ 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneIndexer.java
  2016-10-11 09:06:31 UTC (rev 19671)
@@ -19,6 +19,8 @@
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.commons.io.FileUtils;
+
 import gate.creole.annic.Constants;
 import gate.creole.annic.IndexException;
 import gate.creole.annic.Indexer;
@@ -98,7 +100,12 @@
 
     String baseTokenAnnotationType = (String)parameters
             .get(Constants.BASE_TOKEN_ANNOTATION_TYPE);
-    if(baseTokenAnnotationType.indexOf(".") > -1 || 
baseTokenAnnotationType.indexOf("=") > -1
+    
+    if(baseTokenAnnotationType == null || 
baseTokenAnnotationType.trim().length() == 0) {
+      baseTokenAnnotationType = Constants.ANNIC_TOKEN;
+      parameters.put(Constants.BASE_TOKEN_ANNOTATION_TYPE,
+              Constants.ANNIC_TOKEN);
+    } else if(baseTokenAnnotationType.indexOf(".") > -1 || 
baseTokenAnnotationType.indexOf("=") > -1
         || baseTokenAnnotationType.indexOf(";") > -1 || 
baseTokenAnnotationType.indexOf(",") > -1) {
       throw new IndexException(
       "Base token annotation type cannot have '.' , '=', ',' or ';; in it");
@@ -110,14 +117,7 @@
     if(DEBUG) {
       System.out.println("BTAT : " + baseTokenAnnotationType);
       System.out.println("IUAT : " + indexUnitAnnotationType);
-    }
-
-    if(baseTokenAnnotationType == null
-            || baseTokenAnnotationType.trim().length() == 0) {
-      baseTokenAnnotationType = Constants.ANNIC_TOKEN;
-      parameters.put(Constants.BASE_TOKEN_ANNOTATION_TYPE,
-              Constants.ANNIC_TOKEN);
-    }
+    }   
   }
 
   /**
@@ -227,9 +227,10 @@
   /** Deletes the index. */
   @Override
   public void deleteIndex() throws IndexException {
-    boolean isDeleted = true;
+
     if(parameters == null) return;
     File dir = null;
+    //TODO should we use the gate util Files mehotd for this
     try {
       dir = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL))
               .toURI());
@@ -237,23 +238,8 @@
       dir = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL))
               .getFile());
     }
-
-    if(dir.exists() && dir.isDirectory()) {
-      File[] files = dir.listFiles();
-      for(int i = 0; i < files.length; i++) {
-        File f = files[i];
-        if(f.isDirectory()) {
-          File[] subFiles = f.listFiles();
-          for(int j = 0; j < subFiles.length; j++) {
-            File sf = subFiles[j];
-            sf.delete();
-          }
-        }
-        f.delete();
-      }
-    }
-    isDeleted = dir.delete();
-    if(!isDeleted) {
+   
+    if(!FileUtils.deleteQuietly(dir)) {
       throw new IndexException("Can't delete directory" + 
dir.getAbsolutePath());
     }
   }
@@ -267,6 +253,7 @@
           throws IndexException {
 
     String location = null;
+    //TODO should we use the gate util Files mehotd for this
     try {
       location = new File(((URL)parameters.get(Constants.INDEX_LOCATION_URL))
               .toURI()).getAbsolutePath();
@@ -421,7 +408,7 @@
             .get(Constants.ANNOTATION_SETS_NAMES_TO_INCLUDE));
 
     Boolean createTokensAutomatically = (Boolean) 
parameters.get(Constants.CREATE_TOKENS_AUTOMATICALLY);
-    if(createTokensAutomatically == null) createTokensAutomatically = new 
Boolean(true);
+    if(createTokensAutomatically == null) createTokensAutomatically = 
Boolean.TRUE;
     
     String idToUse = gateDoc.getLRPersistenceId() == null
             ? gateDoc.getName()
@@ -515,18 +502,19 @@
 
     java.io.FileWriter fileWriter = new java.io.FileWriter(file);
     Map<String,Object> indexInformation = new HashMap<String,Object>();
-    Iterator<String> iter = parameters.keySet().iterator();
-    while(iter.hasNext()) {
-      String key = iter.next();
+    //Iterator<String> iter = parameters.keySet().iterator();
+    //while(iter.hasNext()) {
+    for (Map.Entry<String, Object> entry : parameters.entrySet()){
+      String key = entry.getKey();
       if(key.equals(Constants.INDEX_LOCATION_URL)) continue;
-      indexInformation.put(key, parameters.get(key));
+      indexInformation.put(key, entry.getValue());
     }
 
     indexInformation.put(Constants.CORPUS_INDEX_FEATURE,
             Constants.CORPUS_INDEX_FEATURE_VALUE);
     if(corpus != null)
-      indexInformation.put(Constants.CORPUS_SIZE, new Integer(corpus
-              .getDocumentNames().size()));
+      indexInformation.put(Constants.CORPUS_SIZE, corpus
+              .getDocumentNames().size());
 
     // we would use XStream library to store annic patterns
     com.thoughtworks.xstream.XStream xstream = new 
com.thoughtworks.xstream.XStream();

Modified: 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java
===================================================================
--- 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java
     2016-10-11 01:22:31 UTC (rev 19670)
+++ 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearchThread.java
     2016-10-11 09:06:31 UTC (rev 19671)
@@ -10,6 +10,8 @@
 import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInput;
 import java.io.ObjectInputStream;
@@ -19,9 +21,12 @@
 import java.util.List;
 import java.util.Map;
 
+import com.thoughtworks.xstream.XStream;
+import com.thoughtworks.xstream.io.xml.StaxDriver;
+
+import gate.creole.annic.Constants;
 import gate.creole.annic.Pattern;
 import gate.creole.annic.PatternAnnotation;
-import gate.creole.annic.Constants;
 import gate.creole.annic.SearchException;
 import gate.creole.annic.apache.lucene.search.Hits;
 import gate.creole.annic.apache.lucene.search.Query;
@@ -169,16 +174,17 @@
 
     try {
       // first find out the location of Index
-      String temp = "";
+      //TODO does this just replace \ with / if so we should do this better
+      StringBuilder temp = new StringBuilder();
       for(int i = 0; i < indexLocation.length(); i++) {
         if(indexLocation.charAt(i) == '\\') {
-          temp += "/";
+          temp.append("/");
         }
         else {
-          temp += indexLocation.charAt(i);
+          temp.append(indexLocation.charAt(i));
         }
       }
-      indexLocation = temp;
+      indexLocation = temp.toString();
 
       /*
        * for each different location there can be different
@@ -208,21 +214,16 @@
         return false;
       }
 
-      java.io.FileReader fileReader = new java.io.FileReader(indexLocation
-              + "LuceneIndexDefinition.xml");
+      Map<String,Object> indexInformation = null;
+      
+      // other wise read this file
+      XStream xstream = new XStream(new StaxDriver());
+      try (FileReader fileReader =
+          new FileReader(indexLocation + "LuceneIndexDefinition.xml");) {
 
-      Map<String,Object> indexInformation = null;
-      try {
-        // other wise read this file
-        com.thoughtworks.xstream.XStream xstream = new 
com.thoughtworks.xstream.XStream(
-                new com.thoughtworks.xstream.io.xml.StaxDriver());
-  
         // Saving was accomplished by using XML serialization of the map.
-        indexInformation = (Map<String,Object>)xstream.fromXML(fileReader);
+        indexInformation = (Map<String, Object>)xstream.fromXML(fileReader);
       }
-      finally {
-        fileReader.close();
-      }
 
       // find out if the current index was indexed by annicIndexPR
       String indexedWithANNICIndexPR = (String)indexInformation
@@ -293,7 +294,7 @@
           // iterate through each result and collect necessary
           // information
           for(int hitIndex = 0; hitIndex < hits.length(); hitIndex++) {
-            int index = firstTermPositions[0].indexOf(new Integer(hits
+            int index = firstTermPositions[0].indexOf(Integer.valueOf(hits
                     .id(hitIndex)));
   
             // we fetch all the first term positions for the query
@@ -354,8 +355,7 @@
       if(searchResultInfoMap.size() > 0)
         success = true;
       else success = false;
-    }
-    catch(Exception e) {
+    } catch(IOException | gate.creole.ir.SearchException e) {
       throw new SearchException(e);
     }
 
@@ -446,7 +446,7 @@
          * if none of the found patterns is valid continue with the next
          * query
          */
-        if(patternResult == null || patternResult.numberOfPatterns == 0)
+        if(patternResult.numberOfPatterns == 0)
           continue;
 
         /*
@@ -498,9 +498,9 @@
       List<Pattern> pats = locatePatterns((String)aResult.getDocumentID(),
               aResult.getAnnotationSetName(), aResult.getGateAnnotations(),
               firstTermPositions, patternLength, aResult.getQuery());
-      if(pats != null) {
-        annicPatterns.addAll(pats);
-      }
+      
+      annicPatterns.addAll(pats);
+      
     }
     return annicPatterns;
   }
@@ -647,19 +647,19 @@
     File folder = new File(indexDirectory, Constants.SERIALIZED_FOLDER_NAME);
     folder = new File(folder, documentFolder);
     File fileToLoad = new File(folder, documentID + ".annic");
-    InputStream file = new FileInputStream(fileToLoad);
-    InputStream buffer = new BufferedInputStream(file);
-    ObjectInput input = new ObjectInputStream(buffer);
+    
+    try (InputStream file = new FileInputStream(fileToLoad);
+        InputStream buffer = new BufferedInputStream(file);
+        ObjectInput input = new ObjectInputStream(buffer);) {
 
-    // deserialize the List
-    @SuppressWarnings("unchecked")
-    List<gate.creole.annic.apache.lucene.analysis.Token> recoveredTokenStream 
= 
-      (List<gate.creole.annic.apache.lucene.analysis.Token>)input.readObject();
-    if(input != null) {
-      // close "input" and its underlying streams
-      input.close();
-    }
-    return recoveredTokenStream;
+      // deserialize the List
+      @SuppressWarnings("unchecked")
+      List<gate.creole.annic.apache.lucene.analysis.Token> 
recoveredTokenStream =
+          (List<gate.creole.annic.apache.lucene.analysis.Token>)input
+              .readObject();
+
+      return recoveredTokenStream;
+    }    
   }
 
   /**
@@ -709,13 +709,13 @@
         // if annotType == "*", the query was {AnnotType}
         if(annotType.equals("*")) {
           if(type.equals(annotText) && annotType.equals(text)) {
-            positions.add(new Integer(token.getPosition()));
+            positions.add(token.getPosition());
           }
         }
         // the query is Token == "string"
         else {
           if(annotText.equals(type) && annotType.equals(text)) {
-            positions.add(new Integer(token.getPosition()));
+            positions.add(token.getPosition());
           }
         }
       }
@@ -871,7 +871,7 @@
               }
             }
             // we send the endoffset to our GUI class
-            patLens.add(new Integer(upto));
+            patLens.add(upto);
 
             /*
              * k holds the position of the first token in right context
@@ -900,7 +900,7 @@
             tempPos = token.getPosition();
           }
         }
-        patLens.add(new Integer(upto));
+        patLens.add(upto);
         k++;
       }
       int maxEndOffset = upto;
@@ -1021,7 +1021,7 @@
    * 
    * @author niraj
    */
-  private class PatternResult {
+  private static class PatternResult {
     int numberOfPatterns;
 
     List<List<PatternAnnotation>> gateAnnotations;
@@ -1040,7 +1040,7 @@
    * @author niraj
    * 
    */
-  private class QueryItem {
+  private static class QueryItem {
     @SuppressWarnings("unused")
     float score;
 

Modified: 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java
===================================================================
--- 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java
 2016-10-11 01:22:31 UTC (rev 19670)
+++ 
gate/branches/sawdust2/gate-core/src/main/java/gate/creole/annic/lucene/LuceneSearcher.java
 2016-10-11 09:06:31 UTC (rev 19671)
@@ -327,7 +327,7 @@
    * Gets the number of base token annotations to show in the context.
    */
   public Integer getContextWindow() {
-    return new Integer(this.contextWindow);
+    return this.contextWindow;
   }
 
   /**

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to