Revision: 20281 http://sourceforge.net/p/gate/code/20281 Author: ian_roberts Date: 2017-12-05 00:35:07 +0000 (Tue, 05 Dec 2017) Log Message: ----------- Updated index upgrader to be able to cope with quasi-succinct indexes (which have a different set of files on disk compared to traditional FileHPIndexes)
Modified Paths: -------------- mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java Modified: mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java =================================================================== --- mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java 2017-12-05 00:34:15 UTC (rev 20280) +++ mimir/trunk/mimir-core/src/gate/mimir/util/IndexUpgrader.java 2017-12-05 00:35:07 UTC (rev 20281) @@ -24,7 +24,12 @@ import gate.mimir.index.IndexException; import it.unimi.di.big.mg4j.index.DiskBasedIndex; import it.unimi.di.big.mg4j.index.cluster.DocumentalCluster; +import it.unimi.di.big.mg4j.io.IOFactory; +import it.unimi.di.big.mg4j.io.IOFactories; +import it.unimi.dsi.util.Properties; + + import java.io.File; import java.io.FilenameFilter; import java.io.IOException; @@ -53,12 +58,39 @@ DiskBasedIndex.TERMS_EXTENSION, DiskBasedIndex.OFFSETS_EXTENSION }; + + /** + * A minimal set of files required for a valid quasi-succinct index. + */ + protected static final String[] REQUIRED_QS_INDEX_FILE_EXTENSIONS = new String[] { + DiskBasedIndex.COUNTS_EXTENSION, + DiskBasedIndex.COUNTS_EXTENSION + "offsets", + DiskBasedIndex.POINTERS_EXTENSIONS, + DiskBasedIndex.POINTERS_EXTENSIONS + "offsets", + DiskBasedIndex.POSITIONS_EXTENSION, + DiskBasedIndex.POSITIONS_EXTENSION + "offsets", + DiskBasedIndex.TERMS_EXTENSION, + }; + /** + * A minimal set of files required for a valid direct index. + */ protected static final String[] REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS = new String[] { DiskBasedIndex.INDEX_EXTENSION, DiskBasedIndex.TERMS_EXTENSION, DiskBasedIndex.OFFSETS_EXTENSION }; + + /** + * A minimal set of files required for a valid quasi-succinct direct index. + */ + protected static final String[] REQUIRED_QS_DIRECT_INDEX_FILE_EXTENSIONS = new String[] { + DiskBasedIndex.COUNTS_EXTENSION, + DiskBasedIndex.COUNTS_EXTENSION + "offsets", + DiskBasedIndex.POINTERS_EXTENSIONS, + DiskBasedIndex.POINTERS_EXTENSIONS + "offsets", + DiskBasedIndex.TERMS_EXTENSION, + }; public static void upgradeIndex(File indexDirectory) throws IOException, IndexException { @@ -196,11 +228,39 @@ AtomicIndex.DIRECT_INDEX_NAME_SUFFIX + ".")); } }); - Set<String> requiredExtensions = new HashSet<String>( - Arrays.asList(REQUIRED_INDEX_FILE_EXTENSIONS)); - Set<String> requiredDirectExtensions = new HashSet<String>( - Arrays.asList(REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS)); + + Set<String> requiredExtensions = new HashSet<String>(); + Set<String> requiredDirectExtensions = new HashSet<String>(); + String indexPropertiesFile = new File(sourceDir, inputFilePrefix + DiskBasedIndex.PROPERTIES_EXTENSION).getAbsolutePath(); + try { + Properties indexProperties = IOFactories.loadProperties(IOFactory.FILESYSTEM_FACTORY, + indexPropertiesFile); + if("it.unimi.di.big.mg4j.index.QuasiSuccinctIndex".equals(indexProperties.getString("indexclass"))) { + requiredExtensions.addAll(Arrays.asList(REQUIRED_QS_INDEX_FILE_EXTENSIONS)); + } else { + requiredExtensions.addAll(Arrays.asList(REQUIRED_INDEX_FILE_EXTENSIONS)); + } + } catch(Exception e) { + throw new IndexException("Error reading " + indexPropertiesFile, e); + } + + if(direct) { + String directPropertiesFile = new File(sourceDir, inputFilePrefix + AtomicIndex.DIRECT_INDEX_NAME_SUFFIX + + DiskBasedIndex.PROPERTIES_EXTENSION).getAbsolutePath(); + try { + Properties directProperties = IOFactories.loadProperties(IOFactory.FILESYSTEM_FACTORY, + directPropertiesFile); + if("it.unimi.di.big.mg4j.index.QuasiSuccinctIndex".equals(directProperties.getString("indexclass"))) { + requiredDirectExtensions.addAll(Arrays.asList(REQUIRED_QS_DIRECT_INDEX_FILE_EXTENSIONS)); + } else { + requiredDirectExtensions.addAll(Arrays.asList(REQUIRED_DIRECT_INDEX_FILE_EXTENSIONS)); + } + } catch(Exception e) { + throw new IndexException("Error reading " + directPropertiesFile, e); + } + } + for(File aFile : atomicIndexFiles) { String extension = aFile.getName().substring(inputFilePrefix.length()); if(direct && extension.startsWith(AtomicIndex.DIRECT_INDEX_NAME_SUFFIX)) { This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs