mimir

valyt Wed, 18 Dec 2013 08:07:54 -0800

Revision: 17186
          http://sourceforge.net/p/gate/code/17186
Author:   valyt
Date:     2013-12-18 16:03:06 +0000 (Wed, 18 Dec 2013)
Log Message:
-----------
Starting to draft the new updateable index API.


Modified Paths:
--------------
    mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java

Added Paths:
-----------
    mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java

Removed Paths:
-------------
    mimir/branches/5.0/mimir-core/src/gate/mimir/Index.java

Deleted: mimir/branches/5.0/mimir-core/src/gate/mimir/Index.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/Index.java     2013-12-18 
15:26:26 UTC (rev 17185)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/Index.java     2013-12-18 
16:03:06 UTC (rev 17186)
@@ -1,56 +0,0 @@
-/*
- *  Index.java
- *
- *  Copyright (c) 2007-2013, The University of Sheffield.
- *
- *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), 
- *  and is free software, licenced under the GNU Lesser General Public License,
- *  Version 3, June 2007 (also included with this distribution as file
- *  LICENCE-LGPL3.html).
- *
- *  Valentin Tablan, 30 Oct 2013
- *
- *  $Id$
- */
-package gate.mimir;
-
-import java.io.File;
-
-/**
- * A Mímir index which can index document and answer queries. This class is the
- * main entry point to the Mímir API.
- * 
- * A Mímir index comprises the following data elements:
- * <ul>
- *   <li>A datastore used by the semantic annotation helpers</li>
- *   <li>one or more sub-indexes</li>
- *   <li>the zip collection containing the document textual content and 
- *   metadata</li>
- * </ul>
- */
-public class Index {
-  
-  /**
-   * The {@link IndexConfig} used for this index.
-   */
-  protected IndexConfig indexConfig;
-
-  /**
-   * Create a new Index.
-   * @param indexConfig the configuration for the index.
-   */
-  public Index(IndexConfig indexConfig) {
-    super();
-    this.indexConfig = indexConfig;
-  }
-  
-  /**
-   * Open and existing Index.
-   * @param indexDirectory the on-disk directory containing the index to be 
-   * opened.
-   */
-  public Index(File indexDirectory ) {
-    // TODO
-  }
-  
-}

Copied: mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java (from rev 
17042, mimir/branches/5.0/mimir-core/src/gate/mimir/Index.java)
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java                
                (rev 0)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/MimirIndex.java        
2013-12-18 16:03:06 UTC (rev 17186)
@@ -0,0 +1,65 @@
+/*
+ *  Index.java
+ *
+ *  Copyright (c) 2007-2013, The University of Sheffield.
+ *
+ *  This file is part of GATE Mímir (see http://gate.ac.uk/family/mimir.html), 
+ *  and is free software, licenced under the GNU Lesser General Public License,
+ *  Version 3, June 2007 (also included with this distribution as file
+ *  LICENCE-LGPL3.html).
+ *
+ *  Valentin Tablan, 30 Oct 2013
+ *
+ *  $Id$
+ */
+package gate.mimir;
+
+import gate.mimir.index.AtomicIndex;
+import gate.mimir.index.Indexer;
+import gate.mimir.search.QueryEngine;
+
+import java.io.File;
+
+/**
+ * A Mímir index which can index document and answer queries. This class is the
+ * main entry point to the Mímir API.
+ * 
+ * A Mímir index is a compound index comprising the following data elements:
+ * <ul>
+ *   <li>A datastore used by the semantic annotation helpers</li>
+ *   <li>one or more indirect sub-indexes</li>
+ *   <li>one or more direct sub-indexes (optional)</li>
+ *   <li>the zip collection containing the document textual content and 
+ *   metadata</li>
+ * </ul>
+ */
+public class MimirIndex {
+  
+  /**
+   * The {@link IndexConfig} used for this index.
+   */
+  protected IndexConfig indexConfig;
+
+  /**
+   * Create a new Index.
+   * @param indexConfig the configuration for the index.
+   */
+  public MimirIndex(IndexConfig indexConfig) {
+    super();
+    this.indexConfig = indexConfig;
+  }
+  
+  /**
+   * Open and existing Index.
+   * @param indexDirectory the on-disk directory containing the index to be 
+   * opened.
+   */
+  public MimirIndex(File indexDirectory ) {
+    // TODO
+  }
+  
+  protected AtomicIndex[] tokenIndexes;
+  
+  protected AtomicIndex[] mentionIndexes;
+  
+}

Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java
===================================================================
--- mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 
2013-12-18 15:26:26 UTC (rev 17185)
+++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 
2013-12-18 16:03:06 UTC (rev 17186)
@@ -14,35 +14,145 @@
  */
 package gate.mimir.index;
 
-import it.unimi.di.big.mg4j.index.IndexIterator;
+import gate.Document;
+import gate.mimir.MimirIndex;
 import it.unimi.di.big.mg4j.index.IndexReader;
 
+import java.io.File;
+
 /**
  * An indirect index associating terms with documents. Terms can be either 
token
- * feature values, or semantic annotation URIs.
+ * feature values, or semantic annotation URIs. Optionally, a direct index may 
+ * also be present.
+ * 
+ * An atomic index manages a head index (the principal data) and a set of tail 
+ * indexes (batches containing updates). Additionally, the date representing 
+ * all the new documents that have been queued for indexing since the last tail
+ * was written are stored in RAM.  
  */
-public interface AtomicIndex {
+public abstract class AtomicIndex {
 
   /**
-   * Adds a new posting (term - document association) to the index. 
-   * @param termId the term being indexed
-   * @param documentId the document in which the term occurs.
-   * @param position the position in the document where the term occurs. 
+   * The file name (under the current directory for this atomic index) which 
+   * stores the principal index. 
    */
-  public void addPosting(long termId, long documentId, int position);  
+  public static final String HEAD_FILE_NAME = "head";
   
   /**
-   * Searches the index.
-   * @param termId the term being sought
-   * @return an {@link IndexIterator} that can return the results of the 
search.
+   * The file extension used for the temporary directory where the updated head
+   * is being built.
    */
-  public IndexIterator search(long termId);
+  public static final String HEAD_NEW_EXT = ".new";
   
   /**
-   * Returns an {@link IndexReader} that can be used to search the index. The 
-   * returned index reader is not guaranteed to reflect any changes to the 
index
-   * that occur after the reader has been returned.
-   * @return an {@link IndexReader}
+   * The file extension used for the temporary directory where the old head 
+   * index is being stored while the newly updated one is being installed.
    */
-  public IndexReader getReader();
+  public static final String HEAD_OLD_EXT = ".old";
+  
+  /**
+   * The prefix used for file names (under the current directory for this 
+   * atomic index) for updates to the head index.
+   */
+  public static final String TAIL_FILE_NAME_PREFIX = "tail-";
+  
+  
+  
+  /**
+   * The file name (under the current directory for this atomic index) for the
+   * directory containing the documents that have been queued for indexing, 
but 
+   * not yet indexed. 
+   */
+  public static final String DOCUMENTs_QUEUE_FILE_NAME = "queued-documents";
+  
+  /**
+   * The name of this atomic index.
+   */
+  protected String name;
+  
+  protected File indexDirectory;
+  
+  /**
+   * The number of occurrences stored in this index.
+   */
+  protected long occurrences;
+  
+  /**
+   * The number of occurrences represented in RAM and not yet written to disk. 
 
+   */
+  protected long newOccurrences;
+  
+  /**
+   * The {@link MimirIndex} that this atomic index is a member of.
+   */
+  protected MimirIndex parent;
+  
+  protected boolean hasDirectIndex;
+  
+       /**
+        * Gets the name of this atomic index. This is used as the file name 
for the 
+        * directory storing the index files.
+        * @return
+        */
+       public String getName() {
+         return name;
+       }
+       
+       /**
+        * Is a direct index configured for this atomic index. 
+        * @return
+        */
+       public boolean hasDirectIndex(){
+         return hasDirectIndex;
+       }
+       
+       public void indexDocument(Document document) {
+         //TODO
+         
+         // write to documents queue
+         
+         // convert to index data in RAM
+         
+       }
+       
+       /**
+        * Writes all the data currently stored in RAM to a new tail index.
+        */
+       public void writeNewTail() {
+         //TODO 
+         // dump new tail
+         
+         // merge new tail into index cluster
+         
+         if(hasDirectIndex) {
+           // dump new direct tail (invert the tail just written)
+           // merge new direct tail into direct index cluster
+         }
+         
+         // clear queued-documents folder
+         
+         newOccurrences = 0;
+       }
+       
+       
+       /**
+        * Combines all the currently existing tails into the head, generating 
a new
+        * head index.
+        */
+       public void combineTails() {
+         // TODO
+         
+         // create new head directory
+         
+         // start combining the head, and each of the tails, writing out to 
the 
+         // new head dir.
+         
+         
+       }
+       
+       public IndexReader getIndexReader() {
+         // TODO
+         return null;
+       }
+       
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Rapidly troubleshoot problems before they affect your business. Most IT 
organizations don't have a clear picture of how application performance 
affects their revenue. With AppDynamics, you get 100% visibility into your 
Java,.NET, & PHP application. Start your 15-day FREE TRIAL of AppDynamics Pro!
http://pubads.g.doubleclick.net/gampad/clk?id=84349831&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

[gate-cvs] SF.net SVN: gate:[17186] mimir/branches/5.0/mimir-core/src/gate/mimir

Reply via email to