Revision: 15929
          http://gate.svn.sourceforge.net/gate/?rev=15929&view=rev
Author:   valyt
Date:     2012-07-13 11:01:17 +0000 (Fri, 13 Jul 2012)
Log Message:
-----------
Support for requesting direct indexes in IndexConfig. If enabled, direct 
indexes are created during the index close operation.

Modified Paths:
--------------
    mimir/trunk/mimir-core/src/gate/mimir/IndexConfig.java
    mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MentionsIndexBuilder.java
    
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirDirectIndexBuilder.java
    mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirIndexBuilder.java
    mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java

Modified: mimir/trunk/mimir-core/src/gate/mimir/IndexConfig.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/IndexConfig.java      2012-07-13 
03:17:22 UTC (rev 15928)
+++ mimir/trunk/mimir-core/src/gate/mimir/IndexConfig.java      2012-07-13 
11:01:17 UTC (rev 15929)
@@ -47,10 +47,41 @@
  * Interface for indexer configurations.
  */
 public class IndexConfig implements Serializable {
+  
   /**
+   * Base class for indexer configs
+   */
+  public static class IndexerConfig implements Serializable {
+    
+    /**
+     * Serialisation ID.
+     */
+    private static final long serialVersionUID = -3980825689154182192L;
+
+    public IndexerConfig(boolean directIndexEnabled) {
+      super();
+      this.directIndexEnabled = directIndexEnabled;
+    }
+
+    /**
+     * Should a direct index be also built?
+     */
+    private boolean directIndexEnabled = false;
+    
+    /**
+     * Should a direct index be also built?
+     * @return <code>true</code> if a direct index was requested.
+     */
+    public boolean isDirectIndexEnabled() {
+      return directIndexEnabled;
+    }
+
+  }
+  
+  /**
    * Object storing the configuration for a Token indexer.
    */
-  public static class TokenIndexerConfig implements Serializable {
+  public static class TokenIndexerConfig extends IndexerConfig {
     /**
      * Serialisation ID.
      */
@@ -76,8 +107,11 @@
      *          The {@link TermProcessor} to be used by this indexer. If
      *          <code>null</code> is given, then a {@link NullTermProcessor} is
      *          used.
+     * @param directIndexEnabled should a direct index also be built?         
      */
-    public TokenIndexerConfig(String featureName, TermProcessor termProcessor) 
{
+    public TokenIndexerConfig(String featureName, TermProcessor termProcessor, 
+                              boolean directIndexEnabled) {
+      super(directIndexEnabled);
       this.featureName = featureName;
       this.termProcessor =
               termProcessor == null
@@ -109,7 +143,7 @@
   /**
    * Object storing the configuration for a semantic annotation indexer.
    */
-  public static class SemanticIndexerConfig implements Serializable {
+  public static class SemanticIndexerConfig extends IndexerConfig {
     /**
      * Serialisation ID.
      */
@@ -136,9 +170,11 @@
      *          indexer.
      * @param helper
      *          the {@link SemanticAnnotationHelper}s used by this indexer.
+     * @param directIndexEnabled should a direct index also be built?         
      */
     public SemanticIndexerConfig(String[] annotationTypes,
-            SemanticAnnotationHelper[] helpers) {
+            SemanticAnnotationHelper[] helpers, boolean directIndexEnabled) {
+      super(directIndexEnabled);
       this.annotationTypes = annotationTypes;
       this.helpers = helpers;
     }

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MentionsIndexBuilder.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MentionsIndexBuilder.java  
2012-07-13 03:17:22 UTC (rev 15928)
+++ mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MentionsIndexBuilder.java  
2012-07-13 11:01:17 UTC (rev 15929)
@@ -59,7 +59,8 @@
   public MentionsIndexBuilder(BlockingQueue<GATEDocument> inputQueue,
           BlockingQueue<GATEDocument> outputQueue,
           Indexer indexer, String baseName, SemanticIndexerConfig config){
-    super(inputQueue, outputQueue, indexer, baseName);
+    super(inputQueue, outputQueue, indexer, baseName, 
+        config.isDirectIndexEnabled());
     //get the helpers
     annotationHelpers = new HashMap<String, SemanticAnnotationHelper>(
               config.getAnnotationTypes().length);

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirDirectIndexBuilder.java
===================================================================
--- 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirDirectIndexBuilder.java   
    2012-07-13 03:17:22 UTC (rev 15928)
+++ 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirDirectIndexBuilder.java   
    2012-07-13 11:01:17 UTC (rev 15929)
@@ -41,6 +41,11 @@
 
   private static Logger logger = 
Logger.getLogger(MimirDirectIndexBuilder.class);
   
+  /**
+   * The progress of the index building operation. 
+   */
+  private volatile double buildProgress = 0;
+  
   protected String inputSubindexBasename;
   
   protected static final String BASENAME_SUFFIX = "-dir";
@@ -102,6 +107,8 @@
    */
   @Override
   public void run() {
+    buildProgress = 0;
+    double lastProgress = buildProgress;
     // input documentIDs become output termIDs
     // input termIDs become output documentIDs
     // NB: the variables in this method are named based on output semantics! 
@@ -191,25 +198,37 @@
         // and move to the next input term (output 'document')
         inputTermIterator = inputIndexReader.nextIterator();
         termsProcessed++;
-        if(termsProcessed % 1000 == 0) {
-          logger.debug("Processed " + 
-              percentNF.format((double)termsProcessed / 
inputIndex.numberOfTerms) + 
-              " terms");  
+        buildProgress = (double)termsProcessed / inputIndex.numberOfTerms;
+        if(buildProgress - lastProgress >= 1) {
+          logger.debug("Direct index  " +  percentNF.format(buildProgress) + 
+              " built.");
+          lastProgress = buildProgress;
         }
-        
       }
       inputIndexReader.close();
       // dump the last current batch
       flush();
+      buildProgress = 1;
       // close the index (combine the batches)
       close();
-      progressLogger.done();              
+      progressLogger.done();
     } catch(Exception e) {
       throw new GateRuntimeException("Exception during indexing!", e);
     }
   }
   
   /**
+   * Returns a value between 0 and 1, representing the amount of work already 
+   * performed for the index building operation. Building a large index can be 
+   * very lengthy operation; this method can be called regularly to obtain an 
+   * indication of progress. 
+   * @return a double value
+   */
+  public double getProgress() {
+    return (closingProgress + buildProgress) / 2;
+  }
+  
+  /**
    * Converts a long value into a String containing a zero-padded Hex 
    * representation of the input value. The lexicographic ordering of the 
    * generated strings is the same as the natural order of the corresponding

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirIndexBuilder.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirIndexBuilder.java     
2012-07-13 03:17:22 UTC (rev 15928)
+++ mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/MimirIndexBuilder.java     
2012-07-13 11:01:17 UTC (rev 15929)
@@ -128,7 +128,6 @@
     public PostingsList(byte[] a, boolean differential,
                         Completeness completeness) {
       super(a, differential, completeness);
-      // TODO Auto-generated constructor stub
     }
     
     public void setCount(int count) {
@@ -207,18 +206,31 @@
   protected boolean closed = false;
   
   protected boolean savePositions = true;
+
   /**
    * A value between 0 and 1 representing the progress of the current index 
    * closing operation. 
    */
   protected volatile double closingProgress = 0.0;
+
+  /**
+   * Builder used to construct the direct index (if requested) by inverting the
+   * inverted index.
+   */
+  protected MimirDirectIndexBuilder directIndexBuilder;
   
   /**
    * The index configuration.
    */
   protected IndexConfig indexConfig;
   
+  
   /**
+   * Should a direct index also be built?
+   */
+  protected boolean directIndexEnabled;
+  
+  /**
    * The current document pointer (gets incremented for each document).
    */
   protected long documentPointer;
@@ -353,12 +365,14 @@
   public MimirIndexBuilder(BlockingQueue<GATEDocument> inputQueue,
           BlockingQueue<GATEDocument> outputQueue,
           Indexer indexer, 
-          String baseName) {
+          String baseName,
+          boolean directIndexEnabled) {
     this.inputQueue = inputQueue;
     this.outputQueue = outputQueue;
     this.indexer = indexer;
     this.indexConfig = indexer.getIndexConfig();
     this.indexBaseName = baseName;
+    this.directIndexEnabled = directIndexEnabled;
     
     // create the progress logger.  We use this.getClass to use the
     // logger belonging to a subclass rather than our own.
@@ -572,7 +586,7 @@
       //new term -> create a new postings list.
       termMap.put( currentTerm.copy(), 
               termPostings = new PostingsList( new byte[ 32 ], true, 
-                  Completeness.POINTERS));
+                  Completeness.POSITIONS));
     }
     //add the current posting to the current postings list
     termPostings.setDocumentPointer(documentPointer);
@@ -791,7 +805,13 @@
    * @return
    */
   public double getClosingProgress(){
-    return closingProgress;
+    if(directIndexEnabled) {
+      double directIndexProgress = 
+          (directIndexBuilder == null) ? 0 : directIndexBuilder.getProgress();
+      return (closingProgress + directIndexProgress) / 2;
+    } else {
+      return closingProgress;  
+    }
   }
   
   /**
@@ -962,11 +982,19 @@
       // save the termMap
       generateTermMap(getGlobalFile(DiskBasedIndex.TERMS_EXTENSION), 
         getGlobalFile(DiskBasedIndex.TERMMAP_EXTENSION));
+      // closing completed
+      closingProgress = 1;
+      
+      if(directIndexEnabled) {
+        // also build the direct index
+        directIndexBuilder = new MimirDirectIndexBuilder(
+          indexConfig.getIndexDirectory(), indexBaseName);
+        directIndexBuilder.run();
+      }
     } catch(Exception e) {
       throw new IndexException("Exception while closing the index", e);
     }
     logger.info("Indexing completed for index " + indexBasename());
-    closingProgress = 1;
     closed = true;
   }
   

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java     
2012-07-13 03:17:22 UTC (rev 15928)
+++ mimir/trunk/mimir-core/src/gate/mimir/index/mg4j/TokenIndexBuilder.java     
2012-07-13 11:01:17 UTC (rev 15929)
@@ -102,7 +102,8 @@
           GATEDocumentFactory factory, boolean zipCollection,
           String baseName,
           TokenIndexerConfig config) {
-    super(inputQueue, outputQueue, indexer, baseName);
+    super(inputQueue, outputQueue, indexer, baseName, 
+        config.isDirectIndexEnabled());
     this.termProcessor = config.getTermProcessor();
     this.docMetadataHelpers = indexer.getIndexConfig().getDocMetadataHelpers();
     this.featureName = config.getFeatureName();

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to