Revision: 18412 http://sourceforge.net/p/gate/code/18412 Author: ian_roberts Date: 2014-10-27 20:28:17 +0000 (Mon, 27 Oct 2014) Log Message: ----------- Merging deletion and clustering fixes from trunk, version number to 5.0.1
Modified Paths: -------------- mimir/branches/5.0/build.properties mimir/branches/5.0/doc/changes.tex mimir/branches/5.0/doc/mimir-guide.pdf mimir/branches/5.0/doc/mimir-version.tex mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java mimir/branches/5.0/mimir-cloud/application.properties mimir/branches/5.0/mimir-core/build/ivy.xml mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java Property Changed: ---------------- mimir/branches/5.0/ mimir/branches/5.0/doc/ mimir/branches/5.0/mimir-core/src/ Index: mimir/branches/5.0 =================================================================== --- mimir/branches/5.0 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0 2014-10-27 20:28:17 UTC (rev 18412) Property changes on: mimir/branches/5.0 ___________________________________________________________________ Modified: svn:mergeinfo ## -1,4 +1,4 ## /mimir/branches/3.4:14623,14634-14643,14687 /mimir/branches/4.0:15380-15383,15385-15386,15388 /mimir/branches/4.x:14298-14348 -/mimir/trunk:17278,18060,18064 +/mimir/trunk:17278,18060,18064,18183,18271,18409,18411 \ No newline at end of property Modified: mimir/branches/5.0/build.properties =================================================================== --- mimir/branches/5.0/build.properties 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/build.properties 2014-10-27 20:28:17 UTC (rev 18412) @@ -1,4 +1,4 @@ -app.version=5.0 +app.version=5.0.1 mimir-core.dirname=mimir-core mimir-client.dirname=mimir-client plugins.dirname=plugins Index: mimir/branches/5.0/doc =================================================================== --- mimir/branches/5.0/doc 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/doc 2014-10-27 20:28:17 UTC (rev 18412) Property changes on: mimir/branches/5.0/doc ___________________________________________________________________ Modified: svn:mergeinfo ## -2,4 +2,4 ## /mimir/branches/4.0/doc:15380-15383,15385-15386,15388 /mimir/branches/4.1/doc:15778-15818 /mimir/branches/4.x/doc:14298-14348 -/mimir/trunk/doc:18060,18064 +/mimir/trunk/doc:18060,18064,18411 \ No newline at end of property Modified: mimir/branches/5.0/doc/changes.tex =================================================================== --- mimir/branches/5.0/doc/changes.tex 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/doc/changes.tex 2014-10-27 20:28:17 UTC (rev 18412) @@ -1,6 +1,14 @@ This appendix details the main changes in each \Mimir\ release. +\section{Version 5.0.1 (October 2014)} +Two critical fixes: +\begin{itemize} + \item Deletion of documents now works correctly, it had been broken in + version 5.0 + \item Fixed clustering logic for multi-batch indexes. +\end{itemize} + \section{Version 5.0 (February 2014)} \begin{itemize} \item \Mimir{} indexes are now updateable: new documents can be submitted for Modified: mimir/branches/5.0/doc/mimir-guide.pdf =================================================================== (Binary files differ) Modified: mimir/branches/5.0/doc/mimir-version.tex =================================================================== --- mimir/branches/5.0/doc/mimir-version.tex 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/doc/mimir-version.tex 2014-10-27 20:28:17 UTC (rev 18412) @@ -8,4 +8,4 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\newcommand{\mimirversion}{5.0} \ No newline at end of file +\newcommand{\mimirversion}{5.0.1} \ No newline at end of file Modified: mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java =================================================================== --- mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-client/src/gate/mimir/index/MimirConnector.java 2014-10-27 20:28:17 UTC (rev 18412) @@ -144,7 +144,7 @@ synchronized(this) { if(doc != null){ - objectOutputStream.writeObject(doc); + objectOutputStream.writeUnshared(doc); } if(byteBuffer.size() > BYTE_BUFFER_SIZE) { writeBuffer(); // this will also empty (reset) the buffer Modified: mimir/branches/5.0/mimir-cloud/application.properties =================================================================== --- mimir/branches/5.0/mimir-cloud/application.properties 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-cloud/application.properties 2014-10-27 20:28:17 UTC (rev 18412) @@ -1,5 +1,5 @@ #Grails Metadata file -#Tue Jun 10 17:44:39 BST 2014 +#Mon Oct 27 20:23:42 GMT 2014 app.grails.version=2.2.3 app.name=mimir-cloud -app.version=5.0 +app.version=5.0.1 Modified: mimir/branches/5.0/mimir-core/build/ivy.xml =================================================================== --- mimir/branches/5.0/mimir-core/build/ivy.xml 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-core/build/ivy.xml 2014-10-27 20:28:17 UTC (rev 18412) @@ -3,7 +3,7 @@ <info organisation="uk.ac.gate" module="mimir-core" - revision="5.0"> + revision="5.0.1"> <description homepage="http://gate.ac.uk/mimir" /> </info> Index: mimir/branches/5.0/mimir-core/src =================================================================== --- mimir/branches/5.0/mimir-core/src 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-core/src 2014-10-27 20:28:17 UTC (rev 18412) Property changes on: mimir/branches/5.0/mimir-core/src ___________________________________________________________________ Modified: svn:mergeinfo ## -1,3 +1,4 ## /mimir/branches/3.4/mimir-core/src:14623,14634-14643 /mimir/branches/4.0/mimir-core/src:15380-15383,15385-15386,15388 /mimir/branches/4.x/mimir-core/src:14299-14316 +/mimir/trunk/mimir-core/src:18271,18409 \ No newline at end of property Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java =================================================================== --- mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-core/src/gate/mimir/index/AtomicIndex.java 2014-10-27 20:28:17 UTC (rev 18412) @@ -454,7 +454,10 @@ // prepare the documental cluster Index[] indexes = new Index[batches.size()]; - long[] cutPoints = new long[indexes.length]; + // cut points between the batches - there are numBatches+1 cutpoints, + // cutPoints[0] is always zero, and cutPoints[i] is the sum of the + // sizes of batches 0 to i-1 inclusive + long[] cutPoints = new long[indexes.length + 1]; cutPoints[0] = 0; int numberOfTerms = -1; int numberOfDocuments = -1; @@ -468,10 +471,8 @@ for(MG4JIndex aSubIndex : batches) { indexes[indexIdx] = aSubIndex.invertedIndex; - if(indexIdx < cutPoints.length - 1) { - cutPoints[indexIdx + 1] = cutPoints[indexIdx] + - aSubIndex.invertedIndex.numberOfDocuments; - } + cutPoints[indexIdx + 1] = cutPoints[indexIdx] + + aSubIndex.invertedIndex.numberOfDocuments; numberOfTerms += aSubIndex.invertedIndex.numberOfTerms; numberOfDocuments += aSubIndex.invertedIndex.numberOfDocuments; numberOfPostings += aSubIndex.invertedIndex.numberOfPostings; Modified: mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java =================================================================== --- mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java 2014-10-27 20:20:36 UTC (rev 18411) +++ mimir/branches/5.0/mimir-core/src/gate/mimir/search/RankingQueryRunnerImpl.java 2014-10-27 20:28:17 UTC (rev 18412) @@ -139,6 +139,9 @@ docIndex = (documentIndexes != null ? documentIndexes[(int)(i - start)] : i); docId = documentIds.getLong(docIndex); + // don't need to check for deletion here as we know for sure that this + // doc ID is ok. The only exception would be if it was deleted since + // this query was originally issued, but I think we can live with that long newDoc = queryExecutor.nextDocument(docId - 1); // sanity check if(newDoc == docId) { @@ -201,7 +204,7 @@ try{ // collect all documents and their scores if(ranking) scorer.wrap(queryExecutor); - long docId = ranking ? scorer.nextDocument(-1) : queryExecutor.nextDocument(-1); + long docId = nextNotDeleted(); while(docId >= 0) { // enlarge the hits list if(ranking){ @@ -223,7 +226,7 @@ } // and store the new doc ID documentIds.add(docId); - docId = ranking ? scorer.nextDocument(-1) : queryExecutor.nextDocument(-1); + docId = nextNotDeleted(); } allDocIdsCollected = true; if(ranking) { @@ -734,4 +737,19 @@ } } } + + /** + * Find the next document ID for the current query executor which is not + * marked as deleted in the index. + */ + protected long nextNotDeleted() throws IOException { + long docId = ranking ? scorer.nextDocument(-1) + : queryExecutor.nextDocument(-1); + while(docId >= 0 && queryEngine.getIndex().isDeleted(docId)) { + docId = ranking ? scorer.nextDocument(-1) + : queryExecutor.nextDocument(-1); + } + + return docId; + } } \ No newline at end of file This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs