Revision: 20327 http://sourceforge.net/p/gate/code/20327 Author: ian_roberts Date: 2019-05-30 14:01:23 +0000 (Thu, 30 May 2019) Log Message: ----------- Back-porting https://github.com/GateNLP/mimir/commit/5214f32689bd8ff1ebef126dbd870bbf4ed98e50 and https://github.com/GateNLP/mimir/commit/58a682eb7f870d05fb2286fa10c36edb9528a3b5 from Mimir version 6.1-SNAPSHOT: in cases where the index has more than just the "head" batch, the total number of documents reported by the index was off by one, and the direct index lexical cluster was ignoring the last tail-N batch
Modified Paths: -------------- mimir/trunk/mimir-core/src/gate/mimir/index/AtomicIndex.java Modified: mimir/trunk/mimir-core/src/gate/mimir/index/AtomicIndex.java =================================================================== --- mimir/trunk/mimir-core/src/gate/mimir/index/AtomicIndex.java 2019-05-30 09:31:31 UTC (rev 20326) +++ mimir/trunk/mimir-core/src/gate/mimir/index/AtomicIndex.java 2019-05-30 14:01:23 UTC (rev 20327) @@ -489,10 +489,10 @@ new ContiguousDocumentalStrategy(cutPoints), false, // flat = all component indexes have the same term list bloomFilters, // Bloom Filters - numberOfDocuments, - numberOfTerms, - numberOfPostings, - numberOfOccurences, + numberOfDocuments == -1 ? -1 : numberOfDocuments + 1, + numberOfTerms == -1 ? -1 : numberOfTerms + 1, + numberOfPostings == -1 ? -1 : numberOfPostings + 1, + numberOfOccurences == -1 ? -1 : numberOfOccurences + 1, maxCount, null, // payload true, // hasCounts @@ -517,9 +517,9 @@ // prepare the lexical cluster Index[] indexes = new Index[batches.size()]; - int[] cutPoints = new int[indexes.length]; + int[] cutPoints = new int[indexes.length + 1]; cutPoints[0] = 0; - String[] cutPointTerms = new String[indexes.length]; + String[] cutPointTerms = new String[indexes.length + 1]; cutPointTerms[0] = longToTerm(0); int numberOfTerms = -1; int numberOfDocuments = -1; @@ -554,10 +554,10 @@ return new LexicalCluster(indexes, new ContiguousLexicalStrategy(cutPoints, cutPointTerms), bloomFilters, // Bloom Filters - numberOfDocuments, - numberOfTerms, - numberOfPostings, - numberOfOccurences, + numberOfDocuments == -1 ? -1 : numberOfDocuments + 1, + numberOfTerms == -1 ? -1 : numberOfTerms + 1, + numberOfPostings == -1 ? -1 : numberOfPostings + 1, + numberOfOccurences == -1 ? -1 : numberOfOccurences + 1, maxCount, null, // payload true, // hasCounts This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs