Revision: 20281
          http://sourceforge.net/p/gate/code/20281
Author:   ian_roberts
Date:     2017-12-05 00:35:07 +0000 (Tue, 05 Dec 2017)
Log Message:
-----------
Updated index upgrader to be able to cope with quasi-succinct indexes (which 
have a different set of files on disk compared to traditional FileHPIndexes)

Modified Paths:
--------------
    mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java

Modified: mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java       
2017-12-05 00:34:15 UTC (rev 20280)
+++ mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java       
2017-12-05 00:35:07 UTC (rev 20281)
@@ -24,7 +24,12 @@
 import gate.mimir.index.IndexException;
 import it.unimi.di.big.mg4j.index.DiskBasedIndex;
 import it.unimi.di.big.mg4j.index.cluster.DocumentalCluster;
+import it.unimi.di.big.mg4j.io.IOFactory;
+import it.unimi.di.big.mg4j.io.IOFactories;
 
+import it.unimi.dsi.util.Properties;
+
+
 import java.io.File;
 import java.io.FilenameFilter;
 import java.io.IOException;
@@ -53,12 +58,39 @@
     DiskBasedIndex.TERMS_EXTENSION,
     DiskBasedIndex.OFFSETS_EXTENSION
   };
+
+  /**
+   * A minimal set of files required for a valid quasi-succinct index.
+   */
+  protected static final String[] REQUIRED_QS_INDEX_FILE_EXTENSIONS = new 
String[] {
+    DiskBasedIndex.COUNTS_EXTENSION,
+    DiskBasedIndex.COUNTS_EXTENSION + "offsets",
+    DiskBasedIndex.POINTERS_EXTENSIONS,
+    DiskBasedIndex.POINTERS_EXTENSIONS + "offsets",
+    DiskBasedIndex.POSITIONS_EXTENSION,
+    DiskBasedIndex.POSITIONS_EXTENSION + "offsets",
+    DiskBasedIndex.TERMS_EXTENSION,
+  };
    
+  /**
+   * A minimal set of files required for a valid direct index.
+   */
   protected static final String[] REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS = new 
String[] {
     DiskBasedIndex.INDEX_EXTENSION,
     DiskBasedIndex.TERMS_EXTENSION,
     DiskBasedIndex.OFFSETS_EXTENSION
   };
+
+  /**
+   * A minimal set of files required for a valid quasi-succinct direct index.
+   */
+  protected static final String[] REQUIRED_QS_DIRECT_INDEX_FILE_EXTENSIONS = 
new String[] {
+    DiskBasedIndex.COUNTS_EXTENSION,
+    DiskBasedIndex.COUNTS_EXTENSION + "offsets",
+    DiskBasedIndex.POINTERS_EXTENSIONS,
+    DiskBasedIndex.POINTERS_EXTENSIONS + "offsets",
+    DiskBasedIndex.TERMS_EXTENSION,
+  };
   
   public static void upgradeIndex(File indexDirectory) throws IOException, 
       IndexException {
@@ -196,11 +228,39 @@
              AtomicIndex.DIRECT_INDEX_NAME_SUFFIX + "."));
       }
     });
-    Set<String> requiredExtensions = new HashSet<String>(
-        Arrays.asList(REQUIRED_INDEX_FILE_EXTENSIONS));
-    Set<String> requiredDirectExtensions = new HashSet<String>(
-        Arrays.asList(REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS));
+
+    Set<String> requiredExtensions = new HashSet<String>();
+    Set<String> requiredDirectExtensions = new HashSet<String>();
     
+    String indexPropertiesFile = new File(sourceDir, inputFilePrefix + 
DiskBasedIndex.PROPERTIES_EXTENSION).getAbsolutePath();
+    try {
+      Properties indexProperties = 
IOFactories.loadProperties(IOFactory.FILESYSTEM_FACTORY,
+          indexPropertiesFile);
+      
if("it.unimi.di.big.mg4j.index.QuasiSuccinctIndex".equals(indexProperties.getString("indexclass")))
 {
+        
requiredExtensions.addAll(Arrays.asList(REQUIRED_QS_INDEX_FILE_EXTENSIONS));
+      } else {
+        
requiredExtensions.addAll(Arrays.asList(REQUIRED_INDEX_FILE_EXTENSIONS));
+      }
+    } catch(Exception e) {
+      throw new IndexException("Error reading " + indexPropertiesFile, e);
+    }
+
+    if(direct) {
+      String directPropertiesFile = new File(sourceDir, inputFilePrefix + 
AtomicIndex.DIRECT_INDEX_NAME_SUFFIX
+          + DiskBasedIndex.PROPERTIES_EXTENSION).getAbsolutePath();
+      try {
+        Properties directProperties = 
IOFactories.loadProperties(IOFactory.FILESYSTEM_FACTORY,
+            directPropertiesFile);
+        
if("it.unimi.di.big.mg4j.index.QuasiSuccinctIndex".equals(directProperties.getString("indexclass")))
 {
+          
requiredDirectExtensions.addAll(Arrays.asList(REQUIRED_QS_DIRECT_INDEX_FILE_EXTENSIONS));
+        } else {
+          
requiredDirectExtensions.addAll(Arrays.asList(REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS));
+        }
+      } catch(Exception e) {
+        throw new IndexException("Error reading " + directPropertiesFile, e);
+      }
+    }
+
     for(File aFile : atomicIndexFiles) {
       String extension = aFile.getName().substring(inputFilePrefix.length());
       if(direct && extension.startsWith(AtomicIndex.DIRECT_INDEX_NAME_SUFFIX)) 
{

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to