http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java index 81b6467..0bfc140 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListMerger.java @@ -20,281 +20,376 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.search; import java.nio.ByteBuffer; -import java.util.ArrayList; import java.util.Collections; +import java.util.List; -import org.apache.hyracks.api.context.IHyracksCommonContext; +import org.apache.hyracks.api.context.IHyracksTaskContext; +import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.primitive.IntegerPointable; import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference; +import org.apache.hyracks.dataflow.std.buffermanager.ISimpleFrameBufferManager; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex; -import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor; +import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor; import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor; import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeTupleReference; import org.apache.hyracks.storage.common.MultiComparator; -// TODO: The merge procedure is rather confusing regarding cursor positions, hasNext() calls etc. -// Needs an overhaul some time. +/** + * Conducts the merging operation among all inverted list cursors and generates the final result. + */ public class InvertedListMerger { - protected final MultiComparator invListCmp; - protected SearchResult prevSearchResult; - protected SearchResult newSearchResult; + // current merge process type + public enum processType { + PREFIX_LIST, + SUFFIX_LIST_PROBE, + SUFFIX_LIST_SCAN, + NONE + } - public InvertedListMerger(IHyracksCommonContext ctx, IInvertedIndex invIndex) throws HyracksDataException { + protected final MultiComparator invListCmp; + protected InvertedIndexSearchResult prevSearchResult; + protected InvertedIndexSearchResult newSearchResult; + protected InvertedIndexFinalSearchResult finalSearchResult; + + // To Keep the status of this merge process since we only calculate one frame at a time in case of the final result + protected InvertedListCursor finalInvListCursor; + protected int occurrenceThreshold; + protected int numInvertedLists; + protected int invListIdx; + protected int numPrevResult; + protected int prevBufIdx; + protected int maxPrevBufIdx; + protected int numExpectedPages; + protected ByteBuffer prevCurrentBuffer; + protected FixedSizeFrameTupleAccessor resultFrameTupleAcc; + protected FixedSizeTupleReference resultTuple; + protected boolean advanceCursor; + protected boolean advancePrevResult; + protected int resultTidx; + protected int invListTidx; + protected int invListTupleCount; + protected ITupleReference invListTuple; + protected int prevResultFrameTupleCount; + + // Entire calculation done? + protected boolean isProcessingFinished; + // Dealing with the final list? + protected boolean isProcessingFinalList; + // Dealing with the final partition? + protected boolean isProcessingFinalPartition; + // Variable Initialization done? + protected boolean listVisited; + protected processType currentProcessType = processType.NONE; + + public InvertedListMerger(IHyracksTaskContext ctx, IInvertedIndex invIndex, ISimpleFrameBufferManager bufferManager) + throws HyracksDataException { this.invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories()); - this.prevSearchResult = new SearchResult(invIndex.getInvListTypeTraits(), ctx); - this.newSearchResult = new SearchResult(prevSearchResult); + this.prevSearchResult = new InvertedIndexSearchResult(invIndex.getInvListTypeTraits(), ctx, bufferManager); + this.newSearchResult = new InvertedIndexSearchResult(invIndex.getInvListTypeTraits(), ctx, bufferManager); } - public void merge(ArrayList<IInvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists, - SearchResult searchResult) throws HyracksDataException { + /** + * Generates the result based on the given occurrenceThreshold. For the prefix lists, we do merge. + * For the suffix lists, we either conduct a binary search or a scan for each record ID. + * + * @return true only if all processing for the final list for a partition is done. + * false otherwise. + * @throws HyracksDataException + */ + public boolean merge(List<InvertedListCursor> invListCursors, int occurrenceThreshold, int numPrefixLists, + InvertedIndexFinalSearchResult finalSearchResult) throws HyracksDataException { Collections.sort(invListCursors); int numInvLists = invListCursors.size(); - SearchResult result = null; + InvertedIndexSearchResult result = null; + prevSearchResult.reset(); + newSearchResult.reset(); + boolean isFinalList = false; + // This will be only set to true when the processing the last list in a partition is done. + boolean doneMerge = false; + this.occurrenceThreshold = occurrenceThreshold; for (int i = 0; i < numInvLists; i++) { - SearchResult swapTemp = prevSearchResult; + // Sets the previous search result and the new (current) search result that will be written. + InvertedIndexSearchResult swapTemp = prevSearchResult; prevSearchResult = newSearchResult; newSearchResult = swapTemp; newSearchResult.reset(); if (i + 1 != numInvLists) { - // Use temporary search results when not merging last list. + // Use a temporary intermediate search result when not merging last list. result = newSearchResult; } else { - // When merging the last list, append results to the final search result. - result = searchResult; + // When merging the last list, appends results to the final search result. This is needed since + // this merge() can be called multiple times in case of partitioned T-occurrrence searches. + // So, we need to keep a separate search result. + result = finalSearchResult; + isFinalList = true; } - IInvertedListCursor invListCursor = invListCursors.get(i); - invListCursor.pinPages(); + InvertedListCursor invListCursor = invListCursors.get(i); + // Loads the inverted list (at least some part of it). + invListCursor.prepareLoadPages(); + invListCursor.loadPages(); if (i < numPrefixLists) { - // Merge prefix list. - mergePrefixList(invListCursor, prevSearchResult, result); + // Merges a prefix list. + doneMerge = mergePrefixList(invListCursor, prevSearchResult, result, isFinalList); } else { // Merge suffix list. int numInvListElements = invListCursor.size(); int currentNumResults = prevSearchResult.getNumResults(); // Should we binary search the next list or should we sort-merge it? if (currentNumResults * Math.log(numInvListElements) < currentNumResults + numInvListElements) { - mergeSuffixListProbe(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold); + doneMerge = mergeSuffixListProbe(invListCursor, prevSearchResult, result, i, numInvLists, + occurrenceThreshold, isFinalList); } else { - mergeSuffixListScan(invListCursor, prevSearchResult, result, i, numInvLists, occurrenceThreshold); + doneMerge = mergeSuffixListScan(invListCursor, prevSearchResult, result, i, numInvLists, + occurrenceThreshold, isFinalList); + } + } + + if (isFinalList) { + // For the final list, the method unloadPages() should be called only when traversing + // the inverted list is finished. + if (doneMerge) { + invListCursor.unloadPages(); + invListCursor.close(); } + // Needs to return the calculation result for the final list only. + // Otherwise, the process needs to be continued until this method traverses the final inverted list + // and either generates some output in the output buffer or finishes traversing it. + return doneMerge; } - invListCursor.unpinPages(); + + invListCursor.unloadPages(); + invListCursor.close(); } - } - protected void mergeSuffixListProbe(IInvertedListCursor invListCursor, SearchResult prevSearchResult, - SearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold) - throws HyracksDataException { + // Control does not reach here. + return true; + } - int prevBufIdx = 0; - int maxPrevBufIdx = prevSearchResult.getCurrentBufferIndex(); - ByteBuffer prevCurrentBuffer = prevSearchResult.getBuffers().get(0); + /** + * Continues the merge process on a final list if it has been paused because + * the output buffer of the final result was full. + * + * @return true only if all processing for the final list for a partition is done. + * false otherwise (still more to proceed). + */ + public boolean continueMerge() throws HyracksDataException { + boolean doneMerge; + switch (currentProcessType) { + case PREFIX_LIST: + doneMerge = + mergePrefixList(finalInvListCursor, prevSearchResult, finalSearchResult, isProcessingFinalList); + break; + case SUFFIX_LIST_PROBE: + doneMerge = mergeSuffixListProbe(finalInvListCursor, prevSearchResult, finalSearchResult, invListIdx, + numInvertedLists, occurrenceThreshold, isProcessingFinalList); + break; + case SUFFIX_LIST_SCAN: + doneMerge = mergeSuffixListScan(finalInvListCursor, prevSearchResult, finalSearchResult, invListIdx, + numInvertedLists, occurrenceThreshold, isProcessingFinalList); + break; + default: + throw HyracksDataException.create(ErrorCode.UNDEFINED_INVERTED_LIST_MERGE_TYPE); + } - FixedSizeFrameTupleAccessor resultFrameTupleAcc = prevSearchResult.getAccessor(); - FixedSizeTupleReference resultTuple = prevSearchResult.getTuple(); + if (doneMerge) { + // Final calculation is done. + finalInvListCursor.unloadPages(); + finalInvListCursor.close(); + } - int resultTidx = 0; + return doneMerge; + } - resultFrameTupleAcc.reset(prevCurrentBuffer); + /** + * Merges the given suffix list to the previous result using the given inverted list cursor. When traversing the + * inverted list cursor, a binary search will be conducted for each tuple in the previous search result. + * + * @return true only if all processing for the final list for a partition is done. + * false otherwise. + */ + protected boolean mergeSuffixListProbe(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold, + boolean isFinalList) throws HyracksDataException { + if (isProcessingFinished) { + return true; + } - while (resultTidx < resultFrameTupleAcc.getTupleCount()) { + initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, invListIx, numInvLists, + occurrenceThreshold, processType.SUFFIX_LIST_PROBE); + while (resultTidx < prevResultFrameTupleCount) { resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx)); - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); - + int count = getCount(resultTuple); if (invListCursor.containsKey(resultTuple, invListCmp)) { + // Found the same tuple again on the current list. Increases the count by one. count++; - newSearchResult.append(resultTuple, count); - } else { - if (count + numInvLists - invListIx > occurrenceThreshold) { - newSearchResult.append(resultTuple, count); + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller consume the frame. + // SearchResult.append() should only return false for this case. + return false; } - } - - resultTidx++; - if (resultTidx >= resultFrameTupleAcc.getTupleCount()) { - prevBufIdx++; - if (prevBufIdx <= maxPrevBufIdx) { - prevCurrentBuffer = prevSearchResult.getBuffers().get(prevBufIdx); - resultFrameTupleAcc.reset(prevCurrentBuffer); - resultTidx = 0; + } else if (count + numInvLists - invListIx > occurrenceThreshold) { + // This tuple only exists on the previous result. However, it can be a part of the answer + // if it will be found again in the remaining lists. + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller consume the frame. + // SearchResult.append() should only return false for this case. + return false; } } + resultTidx++; + checkPrevResultAndFetchNextFrame(prevSearchResult); } - } - - protected void mergeSuffixListScan(IInvertedListCursor invListCursor, SearchResult prevSearchResult, - SearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold) - throws HyracksDataException { - - int prevBufIdx = 0; - int maxPrevBufIdx = prevSearchResult.getCurrentBufferIndex(); - ByteBuffer prevCurrentBuffer = prevSearchResult.getBuffers().get(0); - - FixedSizeFrameTupleAccessor resultFrameTupleAcc = prevSearchResult.getAccessor(); - FixedSizeTupleReference resultTuple = prevSearchResult.getTuple(); - boolean advanceCursor = true; - boolean advancePrevResult = false; - int resultTidx = 0; - - resultFrameTupleAcc.reset(prevCurrentBuffer); - - int invListTidx = 0; - int invListNumTuples = invListCursor.size(); + return finishMergingOneList(isFinalList, prevSearchResult, newSearchResult, invListCursor); + } - if (invListCursor.hasNext()) { - invListCursor.next(); + /** + * Merges the given suffix list to the previous result using the given inverted list cursor. When traversing the + * inverted list cursor, a scan will be conducted like the mergePrefixList() method. + * + * @return true only if all processing for the final list for a partition is done. + * false otherwise. + */ + protected boolean mergeSuffixListScan(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, int invListIx, int numInvLists, int occurrenceThreshold, + boolean isFinalList) throws HyracksDataException { + if (isProcessingFinished) { + return true; } - while (invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) { - - ITupleReference invListTuple = invListCursor.getTuple(); + // When generating the final result, we need to initialize the variable only once. + initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, invListIx, numInvLists, + occurrenceThreshold, processType.SUFFIX_LIST_SCAN); + int cmp; + int count; + while (invListTidx < invListTupleCount && resultTidx < prevResultFrameTupleCount) { + invListTuple = invListCursor.getTuple(); resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx)); - - int cmp = invListCmp.compare(invListTuple, resultTuple); + cmp = invListCmp.compare(invListTuple, resultTuple); if (cmp == 0) { - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1; - newSearchResult.append(resultTuple, count); + // Found the same tuple again on the current list. Increases the count by one. + // Also, both the result and the cursor advances. + count = getCount(resultTuple) + 1; + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller consume the frame. + // SearchResult.append() should only return false for this case. + return false; + } advanceCursor = true; advancePrevResult = true; + } else if (cmp < 0) { + // Found a new tuple on the current list. Based on prefix/suffix algorithm, + // this tuple can be ignored since it can't be a part of the answer. + advanceCursor = true; + advancePrevResult = false; } else { - if (cmp < 0) { - advanceCursor = true; - advancePrevResult = false; - } else { - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); - if (count + numInvLists - invListIx > occurrenceThreshold) { - newSearchResult.append(resultTuple, count); - } - advanceCursor = false; - advancePrevResult = true; - } - } - - if (advancePrevResult) { - resultTidx++; - if (resultTidx >= resultFrameTupleAcc.getTupleCount()) { - prevBufIdx++; - if (prevBufIdx <= maxPrevBufIdx) { - prevCurrentBuffer = prevSearchResult.getBuffers().get(prevBufIdx); - resultFrameTupleAcc.reset(prevCurrentBuffer); - resultTidx = 0; + // This tuple only exists on the previous result. However, it can be a part of the answer + // if it will be found again in the remaining lists. + count = getCount(resultTuple); + if (count + numInvLists - invListIx > occurrenceThreshold) { + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; } } + advanceCursor = false; + advancePrevResult = true; } - - if (advanceCursor) { - invListTidx++; - if (invListCursor.hasNext()) { - invListCursor.next(); - } - } + // Gets the next tuple from the previous result and the list cursor. + advancePrevResultAndList(advancePrevResult, advanceCursor, prevSearchResult, invListCursor); } // append remaining elements from previous result set - while (resultTidx < resultFrameTupleAcc.getTupleCount()) { - + // These remaining elements can be a part of the answer if they will be found again in the remaining lists. + while (resultTidx < prevResultFrameTupleCount) { resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx)); - - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); + count = getCount(resultTuple); if (count + numInvLists - invListIx > occurrenceThreshold) { - newSearchResult.append(resultTuple, count); - } - - resultTidx++; - if (resultTidx >= resultFrameTupleAcc.getTupleCount()) { - prevBufIdx++; - if (prevBufIdx <= maxPrevBufIdx) { - prevCurrentBuffer = prevSearchResult.getBuffers().get(prevBufIdx); - resultFrameTupleAcc.reset(prevCurrentBuffer); - resultTidx = 0; + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; } } + resultTidx++; + checkPrevResultAndFetchNextFrame(prevSearchResult); } - } - - protected void mergePrefixList(IInvertedListCursor invListCursor, SearchResult prevSearchResult, - SearchResult newSearchResult) throws HyracksDataException { - - int prevBufIdx = 0; - int maxPrevBufIdx = prevSearchResult.getCurrentBufferIndex(); - ByteBuffer prevCurrentBuffer = prevSearchResult.getBuffers().get(0); - - FixedSizeFrameTupleAccessor resultFrameTupleAcc = prevSearchResult.getAccessor(); - FixedSizeTupleReference resultTuple = prevSearchResult.getTuple(); - boolean advanceCursor = true; - boolean advancePrevResult = false; - int resultTidx = 0; - - resultFrameTupleAcc.reset(prevCurrentBuffer); - - int invListTidx = 0; - int invListNumTuples = invListCursor.size(); + return finishMergingOneList(isFinalList, prevSearchResult, newSearchResult, invListCursor); + } - if (invListCursor.hasNext()) { - invListCursor.next(); + /** + * Merges the prefix lists one by one. It reads the previous search result and one inverted list, + * then generates a new result by applying UNIONALL operation on these two. This method returns true + * only if all processing for the given final list is done. Otherwise, it returns false. + */ + protected boolean mergePrefixList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, boolean isFinalList) throws HyracksDataException { + if (isProcessingFinished) { + return true; } - while (invListTidx < invListNumTuples && resultTidx < resultFrameTupleAcc.getTupleCount()) { + // Assigns necessary variables and fetches a tuple from the inverted list cursor. + initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, processType.PREFIX_LIST); - ITupleReference invListTuple = invListCursor.getTuple(); + int cmp; + int count; + // Traverses the inverted list and the previous result at the same time. + while (invListTidx < invListTupleCount && resultTidx < prevResultFrameTupleCount) { + invListTuple = invListCursor.getTuple(); resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx)); - - int cmp = invListCmp.compare(invListTuple, resultTuple); + cmp = invListCmp.compare(invListTuple, resultTuple); + // Found the same tuple again on the current list: count + 1. Both the result and the cursor advances. if (cmp == 0) { - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)) + 1; - newSearchResult.append(resultTuple, count); + count = getCount(resultTuple) + 1; + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; + } advanceCursor = true; advancePrevResult = true; - } else { - if (cmp < 0) { - int count = 1; - newSearchResult.append(invListTuple, count); - advanceCursor = true; - advancePrevResult = false; - } else { - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); - newSearchResult.append(resultTuple, count); - advanceCursor = false; - advancePrevResult = true; + } else if (cmp < 0) { + // Found a new tuple on the current list. Sets its count to 1. Only advances the cursor. + count = 1; + if (!newSearchResult.append(invListTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; } - } - - if (advancePrevResult) { - resultTidx++; - if (resultTidx >= resultFrameTupleAcc.getTupleCount()) { - prevBufIdx++; - if (prevBufIdx <= maxPrevBufIdx) { - prevCurrentBuffer = prevSearchResult.getBuffers().get(prevBufIdx); - resultFrameTupleAcc.reset(prevCurrentBuffer); - resultTidx = 0; - } - } - } - - if (advanceCursor) { - invListTidx++; - if (invListCursor.hasNext()) { - invListCursor.next(); + advanceCursor = true; + advancePrevResult = false; + // + } else { + // This tuple only exists on the previous result. Maintains its count. Only advances the result. + count = getCount(resultTuple); + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; } + advanceCursor = false; + advancePrevResult = true; + // } + // Gets the next tuple from the previous result and the list cursor. + advancePrevResultAndList(advancePrevResult, advanceCursor, prevSearchResult, invListCursor); } // append remaining new elements from inverted list - while (invListTidx < invListNumTuples) { - ITupleReference invListTuple = invListCursor.getTuple(); - newSearchResult.append(invListTuple, 1); + // + while (invListTidx < invListTupleCount) { + invListTuple = invListCursor.getTuple(); + if (!newSearchResult.append(invListTuple, 1)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; + } invListTidx++; if (invListCursor.hasNext()) { invListCursor.next(); @@ -302,32 +397,188 @@ public class InvertedListMerger { } // append remaining elements from previous result set - while (resultTidx < resultFrameTupleAcc.getTupleCount()) { - + while (resultTidx < prevResultFrameTupleCount) { resultTuple.reset(prevCurrentBuffer.array(), resultFrameTupleAcc.getTupleStartOffset(resultTidx)); + count = getCount(resultTuple); + if (!newSearchResult.append(resultTuple, count)) { + // For a final result, needs to pause when a frame becomes full to let the caller + // consume the frame. SearchResult.append() should only return false for this case. + return false; + } + resultTidx++; + checkPrevResultAndFetchNextFrame(prevSearchResult); + } + + return finishMergingOneList(isFinalList, prevSearchResult, newSearchResult, invListCursor); + } + + /** + * Initializes necessary information for each merging operation (prefix_list) for a list. + */ + protected void initMergingOneList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, boolean isFinalList, processType mergeOpType) + throws HyracksDataException { + initMergingOneList(invListCursor, prevSearchResult, newSearchResult, isFinalList, 0, 0, 0, mergeOpType); + } + + /** + * Initializes necessary information for each merging operation (suffix_list_probe or suffix_list_scan) for a list. + */ + protected void initMergingOneList(InvertedListCursor invListCursor, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, boolean isFinalList, int invListIx, int numInvLists, + int occurrenceThreshold, processType mergeOpType) throws HyracksDataException { + // Each inverted list will be visited only once except the final inverted list. + // When generating the final result, the given inverted list can be visited multiple times + // since we only generate one frame at a time. So, we need to initialize the following variables only once. + if (!listVisited) { + resetVariable(); + prevBufIdx = 0; + maxPrevBufIdx = prevSearchResult.getCurrentBufferIndex(); + // Gets the maximum possible number of expected result pages (in case no common element at all). + numPrevResult = prevSearchResult.getNumResults(); + invListTupleCount = invListCursor.size(); + numExpectedPages = newSearchResult.getExpectedNumPages(numPrevResult + invListTupleCount); + newSearchResult.prepareWrite(numExpectedPages); + prevSearchResult.prepareResultRead(); + prevCurrentBuffer = prevSearchResult.getNextFrame(); + resultFrameTupleAcc = prevSearchResult.getAccessor(); + resultTuple = prevSearchResult.getTuple(); + advanceCursor = true; + advancePrevResult = false; + resultTidx = 0; + resultFrameTupleAcc.reset(prevCurrentBuffer); + invListTidx = 0; + numInvertedLists = numInvLists; + invListIdx = invListIx; + prevResultFrameTupleCount = prevCurrentBuffer == null ? 0 : resultFrameTupleAcc.getTupleCount(); + + // Additional variables to keep the current status of the given merge process + if (isFinalList) { + finalInvListCursor = invListCursor; + finalSearchResult = (InvertedIndexFinalSearchResult) newSearchResult; + currentProcessType = mergeOpType; + this.occurrenceThreshold = occurrenceThreshold; + isProcessingFinalList = true; + listVisited = true; + } + + if (invListCursor.hasNext()) { + invListCursor.next(); + } + } + } - int count = IntegerPointable.getInteger(resultTuple.getFieldData(0), - resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); - newSearchResult.append(resultTuple, count); + /** + * Finishes the merging operation of one list. + * + * @return true only if this merging operation is for a final list + * false otherwise + */ + protected boolean finishMergingOneList(boolean isFinalList, InvertedIndexSearchResult prevSearchResult, + InvertedIndexSearchResult newSearchResult, InvertedListCursor invListCursor) throws HyracksDataException { + prevSearchResult.closeResultRead(false); + invListCursor.close(); + // Final search result can be called multiple times for partitioned occurrence searcher case + // so that it can be written multiple times. So, should not finalize the write here. + // The caller of merge() should ensure that. + if (!isFinalList) { + newSearchResult.finalizeWrite(); + return false; + } else { + // Final list? then, the calculation is done. + isProcessingFinished = true; + return true; + } + } + /** + * Fetches the next previous result frame if the current frame has been consumed. + * Also fetches next element from the inverted list cursor. + */ + protected void advancePrevResultAndList(boolean advancePrevResult, boolean advanceCursor, + InvertedIndexSearchResult prevSearchResult, InvertedListCursor invListCursor) throws HyracksDataException { + if (advancePrevResult) { resultTidx++; - if (resultTidx >= resultFrameTupleAcc.getTupleCount()) { - prevBufIdx++; - if (prevBufIdx <= maxPrevBufIdx) { - prevCurrentBuffer = prevSearchResult.getBuffers().get(prevBufIdx); - resultFrameTupleAcc.reset(prevCurrentBuffer); - resultTidx = 0; - } + checkPrevResultAndFetchNextFrame(prevSearchResult); + } + + if (advanceCursor) { + invListTidx++; + if (invListCursor.hasNext()) { + invListCursor.next(); } } } - public SearchResult createSearchResult() throws HyracksDataException { - return new SearchResult(prevSearchResult); + /** + * Fetches the next page of the previous result if possible. + */ + protected void checkPrevResultAndFetchNextFrame(InvertedIndexSearchResult prevSearchResult) + throws HyracksDataException { + if (resultTidx >= prevResultFrameTupleCount) { + prevBufIdx++; + if (prevBufIdx <= maxPrevBufIdx) { + prevCurrentBuffer = prevSearchResult.getNextFrame(); + resultFrameTupleAcc.reset(prevCurrentBuffer); + prevResultFrameTupleCount = resultFrameTupleAcc.getTupleCount(); + resultTidx = 0; + } + } } - public void reset() { - prevSearchResult.clear(); - newSearchResult.clear(); + /** + * Gets the count of the given tuple in the previous search result. + */ + protected int getCount(FixedSizeTupleReference resultTuple) { + return IntegerPointable.getInteger(resultTuple.getFieldData(0), + resultTuple.getFieldStart(resultTuple.getFieldCount() - 1)); } + + public void reset() throws HyracksDataException { + prevSearchResult.reset(); + newSearchResult.reset(); + resetVariable(); + } + + /** + * Prepares the merge process. This mainly allocates buffers for the two intermediate search results. + */ + public void prepareMerge() throws HyracksDataException { + prevSearchResult.prepareIOBuffer(); + newSearchResult.prepareIOBuffer(); + resetVariable(); + } + + /** + * Cleans every stuff. + */ + public void close() throws HyracksDataException { + prevSearchResult.close(); + newSearchResult.close(); + } + + // Resets the variables. + private void resetVariable() { + prevBufIdx = 0; + maxPrevBufIdx = 0; + numPrevResult = 0; + invListTupleCount = 0; + numExpectedPages = 0; + prevCurrentBuffer = null; + resultFrameTupleAcc = null; + resultTuple = null; + advanceCursor = false; + advancePrevResult = false; + resultTidx = 0; + invListTidx = 0; + prevResultFrameTupleCount = 0; + finalInvListCursor = null; + finalSearchResult = null; + currentProcessType = processType.NONE; + isProcessingFinalList = false; + isProcessingFinished = false; + listVisited = false; + occurrenceThreshold = 0; + } + }
http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java index e9d6c1f..fef4baf 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/InvertedListPartitions.java @@ -22,24 +22,29 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.search; import java.util.ArrayList; import java.util.Arrays; -import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor; +import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory; +import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor; import org.apache.hyracks.storage.am.lsm.invertedindex.util.ObjectCache; +/** + * This class contains multiple inverted list partitions. Each partition can contain inverted list cursors in it. + * + */ public class InvertedListPartitions { private final int DEFAULT_NUM_PARTITIONS = 10; private final int PARTITIONS_SLACK_SIZE = 10; private final int OBJECT_CACHE_INIT_SIZE = 10; private final int OBJECT_CACHE_EXPAND_SIZE = 10; - private final IObjectFactory<ArrayList<IInvertedListCursor>> arrayListFactory; - private final ObjectCache<ArrayList<IInvertedListCursor>> arrayListCache; - private ArrayList<IInvertedListCursor>[] partitions; + private final IObjectFactory<ArrayList<InvertedListCursor>> arrayListFactory; + private final ObjectCache<ArrayList<InvertedListCursor>> arrayListCache; + private ArrayList<InvertedListCursor>[] partitions; private short minValidPartitionIndex; private short maxValidPartitionIndex; - public InvertedListPartitions() { - this.arrayListFactory = new ArrayListFactory<IInvertedListCursor>(); - this.arrayListCache = new ObjectCache<ArrayList<IInvertedListCursor>>(arrayListFactory, OBJECT_CACHE_INIT_SIZE, + public InvertedListPartitions() throws HyracksDataException { + this.arrayListFactory = new ArrayListFactory<InvertedListCursor>(); + this.arrayListCache = new ObjectCache<ArrayList<InvertedListCursor>>(arrayListFactory, OBJECT_CACHE_INIT_SIZE, OBJECT_CACHE_EXPAND_SIZE); } @@ -52,7 +57,7 @@ public class InvertedListPartitions { } else { initialSize = numTokensUpperBound + 1; } - partitions = (ArrayList<IInvertedListCursor>[]) new ArrayList[initialSize]; + partitions = (ArrayList<InvertedListCursor>[]) new ArrayList[initialSize]; } else { if (numTokensUpperBound + 1 >= partitions.length) { partitions = Arrays.copyOf(partitions, numTokensUpperBound + 1); @@ -64,11 +69,11 @@ public class InvertedListPartitions { maxValidPartitionIndex = Short.MIN_VALUE; } - public void addInvertedListCursor(IInvertedListCursor listCursor, short numTokens) { + public void addInvertedListCursor(InvertedListCursor listCursor, short numTokens) throws HyracksDataException { if (numTokens + 1 >= partitions.length) { partitions = Arrays.copyOf(partitions, numTokens + PARTITIONS_SLACK_SIZE); } - ArrayList<IInvertedListCursor> partitionCursors = partitions[numTokens]; + ArrayList<InvertedListCursor> partitionCursors = partitions[numTokens]; if (partitionCursors == null) { partitionCursors = arrayListCache.getNext(); partitionCursors.clear(); @@ -84,7 +89,7 @@ public class InvertedListPartitions { partitionCursors.add(listCursor); } - public ArrayList<IInvertedListCursor>[] getPartitions() { + public ArrayList<InvertedListCursor>[] getPartitions() { return partitions; } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java index 5ae4e05..3d8c35a 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/PartitionedTOccurrenceSearcher.java @@ -20,9 +20,9 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.search; import java.io.IOException; -import java.util.ArrayList; +import java.util.List; -import org.apache.hyracks.api.context.IHyracksCommonContext; +import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.data.std.primitive.ShortPointable; @@ -33,10 +33,13 @@ import org.apache.hyracks.storage.am.common.api.IIndexOperationContext; import org.apache.hyracks.storage.am.common.tuples.ConcatenatingTupleReference; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier; -import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IPartitionedInvertedIndex; -import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor; +import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor; +import org.apache.hyracks.storage.common.IIndexCursor; +/** + * Conducts T-Occurrence searches on inverted lists in one or more partition. + */ public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher { protected final ArrayTupleBuilder lowerBoundTupleBuilder = new ArrayTupleBuilder(1); @@ -45,17 +48,26 @@ public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher protected final ArrayTupleReference upperBoundTuple = new ArrayTupleReference(); protected final ConcatenatingTupleReference fullLowSearchKey = new ConcatenatingTupleReference(2); protected final ConcatenatingTupleReference fullHighSearchKey = new ConcatenatingTupleReference(2); - - // Inverted list cursors ordered by token. Used to read relevant inverted-list partitions of one token one after - // the other for better I/O performance (because the partitions of one inverted list are stored contiguously in a file). - // The above implies that we currently require holding all inverted list for a query in memory. - protected final ArrayList<IInvertedListCursor> cursorsOrderedByTokens = new ArrayList<>(); protected final InvertedListPartitions partitions = new InvertedListPartitions(); - public PartitionedTOccurrenceSearcher(IHyracksCommonContext ctx, IInPlaceInvertedIndex invIndex) + // To keep the current state of this search + protected int curPartIdx; + protected int endPartIdx; + protected int numPrefixLists; + protected boolean isFinalPartIdx; + protected boolean needToReadNewPart; + List<InvertedListCursor>[] partitionCursors; + IInvertedIndexSearchModifier searchModifier; + + public PartitionedTOccurrenceSearcher(IInPlaceInvertedIndex invIndex, IHyracksTaskContext ctx) throws HyracksDataException { - super(ctx, invIndex); + super(invIndex, ctx); initHelperTuples(); + curPartIdx = 0; + endPartIdx = 0; + isFinalPartIdx = false; + isFinishedSearch = false; + needToReadNewPart = true; } private void initHelperTuples() { @@ -87,17 +99,20 @@ public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher } @Override - public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, - IIndexOperationContext ictx) throws HyracksDataException { + public void search(IIndexCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) + throws HyracksDataException { + prepareSearch(); IPartitionedInvertedIndex partInvIndex = (IPartitionedInvertedIndex) invIndex; - searchResult.reset(); + finalSearchResult.reset(); if (partInvIndex.isEmpty()) { + isFinishedSearch = true; resultCursor.open(null, searchPred); return; } tokenizeQuery(searchPred); short numQueryTokens = (short) queryTokenAppender.getTupleCount(); - IInvertedIndexSearchModifier searchModifier = searchPred.getSearchModifier(); + + searchModifier = searchPred.getSearchModifier(); short numTokensLowerBound = searchModifier.getNumTokensLowerBound(numQueryTokens); short numTokensUpperBound = searchModifier.getNumTokensUpperBound(numQueryTokens); occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens); @@ -108,61 +123,195 @@ public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher short maxCountPossible = numQueryTokens; invListCursorCache.reset(); partitions.reset(numTokensLowerBound, numTokensUpperBound); - cursorsOrderedByTokens.clear(); for (int i = 0; i < numQueryTokens; i++) { searchKey.reset(queryTokenAppender, i); if (!partInvIndex.openInvertedListPartitionCursors(this, ictx, numTokensLowerBound, numTokensUpperBound, - partitions, cursorsOrderedByTokens)) { + partitions)) { maxCountPossible--; // No results possible. if (maxCountPossible < occurrenceThreshold) { + // Closes all opened cursors. + closeCursorsInPartitions(partitions); + isFinishedSearch = true; resultCursor.open(null, searchPred); return; } } } - ArrayList<IInvertedListCursor>[] partitionCursors = partitions.getPartitions(); + + partitionCursors = partitions.getPartitions(); short start = partitions.getMinValidPartitionIndex(); short end = partitions.getMaxValidPartitionIndex(); - // Typically, we only enter this case for disk-based inverted indexes. - // TODO: This behavior could potentially lead to a deadlock if we cannot pin - // all inverted lists in memory, and are forced to wait for a page to get evicted - // (other concurrent searchers may be in the same situation). - // We should detect such cases, then unpin all pages, and then keep retrying to pin until we succeed. - // This will require a different "tryPin()" mechanism in the BufferCache that will return false - // if we'd have to wait for a page to get evicted. - if (!cursorsOrderedByTokens.isEmpty()) { - for (int i = start; i <= end; i++) { + + // Process the partitions one-by-one. + endPartIdx = end; + for (int i = start; i <= end; i++) { + // Prune partition because no element in it can satisfy the occurrence threshold. + if (partitionCursors[i] == null) { + continue; + } + // Prune partition because no element in it can satisfy the occurrence threshold. + // An opened cursor should be closed. + if (partitionCursors[i].size() < occurrenceThreshold) { + for (InvertedListCursor cursor : partitionCursors[i]) { + cursor.close(); + } + continue; + } + // Merge inverted lists of current partition. + numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[i].size()); + invListMerger.reset(); + curPartIdx = i; + isFinalPartIdx = i == end ? true : false; + + // If the number of inverted list cursor is one, then we don't need to go through the merge process + // for this partition. + if (partitionCursors[i].size() == 1) { + singleInvListCursor = partitionCursors[i].get(0); + singleInvListCursor.prepareLoadPages(); + singleInvListCursor.loadPages(); + isSingleInvertedList = true; + needToReadNewPart = true; + } else { + singleInvListCursor = null; + isSingleInvertedList = false; + needToReadNewPart = invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, + finalSearchResult); + searchResultBuffer = finalSearchResult.getNextFrame(); + searchResultTupleIndex = 0; + searchResultFta.reset(searchResultBuffer); + } + + // By now, some output was generated by the merger or only one cursor in this partition is associated. + // So, we open the cursor that will fetch these result. If it's the final partition, the outside of + // this for loop will handle opening of the result cursor for a single inverted list cursor case. + if (needToReadNewPart && isFinalPartIdx) { + invListMerger.close(); + finalSearchResult.finalizeWrite(); + isFinishedSearch = true; + } + resultCursor.open(null, searchPred); + return; + } + + // The control reaches here if the above loop doesn't have any valid cursor. + isFinishedSearch = true; + needToReadNewPart = true; + resultCursor.open(null, searchPred); + return; + } + + /** + * Continues a search process in case of the following two cases: + * #1. If it was paused because the output buffer of the final result was full. + * #2. All tuples from a single inverted list has been read. + * + * @return true only if all processing for the final list for the final partition is done. + * false otherwise. + * @throws HyracksDataException + */ + @Override + public boolean continueSearch() throws HyracksDataException { + if (isFinishedSearch) { + return true; + } + + // Case #1 only - output buffer was full + if (!needToReadNewPart) { + needToReadNewPart = invListMerger.continueMerge(); + searchResultBuffer = finalSearchResult.getNextFrame(); + searchResultTupleIndex = 0; + searchResultFta.reset(searchResultBuffer); + // Final calculation done? + if (needToReadNewPart && isFinalPartIdx) { + isFinishedSearch = true; + invListMerger.close(); + finalSearchResult.finalizeWrite(); + return true; + } + return false; + } + + // Finished one partition for the both cases #1 and #2. So, moves to the next partition. + curPartIdx++; + if (curPartIdx <= endPartIdx) { + boolean suitablePartFound = false; + for (int i = curPartIdx; i <= endPartIdx; i++) { + // Prune partition because no element in it can satisfy the occurrence threshold. if (partitionCursors[i] == null) { continue; } // Prune partition because no element in it can satisfy the occurrence threshold. + // An opened cursor should be closed. if (partitionCursors[i].size() < occurrenceThreshold) { - cursorsOrderedByTokens.removeAll(partitionCursors[i]); + for (InvertedListCursor cursor : partitionCursors[i]) { + cursor.close(); + } + continue; } + suitablePartFound = true; + curPartIdx = i; + break; + } + + // If no partition is availble to explore, we stop here. + if (!suitablePartFound) { + isFinishedSearch = true; + invListMerger.close(); + finalSearchResult.finalizeWrite(); + return true; } - // Pin all the cursors in the order of tokens. - int numCursors = cursorsOrderedByTokens.size(); - for (int i = 0; i < numCursors; i++) { - cursorsOrderedByTokens.get(i).pinPages(); + + // Merge inverted lists of current partition. + numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[curPartIdx].size()); + invListMerger.reset(); + finalSearchResult.resetBuffer(); + isFinalPartIdx = curPartIdx == endPartIdx ? true : false; + + // If the number of inverted list cursor is one, then we don't need to go through the merge process. + if (partitionCursors[curPartIdx].size() == 1) { + singleInvListCursor = partitionCursors[curPartIdx].get(0); + singleInvListCursor.prepareLoadPages(); + singleInvListCursor.loadPages(); + isSingleInvertedList = true; + needToReadNewPart = true; + } else { + singleInvListCursor = null; + isSingleInvertedList = false; + needToReadNewPart = invListMerger.merge(partitionCursors[curPartIdx], occurrenceThreshold, + numPrefixLists, finalSearchResult); + searchResultBuffer = finalSearchResult.getNextFrame(); + searchResultTupleIndex = 0; + searchResultFta.reset(searchResultBuffer); } + + // Finished processing one partition + if (needToReadNewPart && isFinalPartIdx) { + invListMerger.close(); + finalSearchResult.finalizeWrite(); + isFinishedSearch = true; + return true; + } + + } else { + isFinishedSearch = true; } - // Process the partitions one-by-one. + return false; + } + + private void closeCursorsInPartitions(InvertedListPartitions parts) throws HyracksDataException { + List<InvertedListCursor>[] partCursors = parts.getPartitions(); + short start = parts.getMinValidPartitionIndex(); + short end = parts.getMaxValidPartitionIndex(); for (int i = start; i <= end; i++) { - if (partitionCursors[i] == null) { + if (partCursors[i] == null) { continue; } - // Prune partition because no element in it can satisfy the occurrence threshold. - if (partitionCursors[i].size() < occurrenceThreshold) { - continue; + for (InvertedListCursor cursor : partCursors[i]) { + cursor.close(); } - // Merge inverted lists of current partition. - int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, partitionCursors[i].size()); - invListMerger.reset(); - invListMerger.merge(partitionCursors[i], occurrenceThreshold, numPrefixLists, searchResult); } - resultCursor.open(null, searchPred); } public void setNumTokensBoundsInSearchKeys(short numTokensLowerBound, short numTokensUpperBound) { @@ -182,7 +331,8 @@ public class PartitionedTOccurrenceSearcher extends AbstractTOccurrenceSearcher return fullHighSearchKey; } - public IInvertedListCursor getCachedInvertedListCursor() { + public InvertedListCursor getCachedInvertedListCursor() throws HyracksDataException { return invListCursorCache.getNext(); } + } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/SearchResult.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/SearchResult.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/SearchResult.java deleted file mode 100644 index 2da1434..0000000 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/SearchResult.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hyracks.storage.am.lsm.invertedindex.search; - -import java.nio.ByteBuffer; -import java.util.ArrayList; - -import org.apache.hyracks.api.context.IHyracksCommonContext; -import org.apache.hyracks.api.dataflow.value.ITypeTraits; -import org.apache.hyracks.api.exceptions.HyracksDataException; -import org.apache.hyracks.data.std.primitive.IntegerPointable; -import org.apache.hyracks.dataflow.common.data.accessors.ITupleReference; -import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor; -import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAppender; -import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeTupleReference; - -/** - * Byte-buffer backed storage for intermediate and final results of inverted-index searches. - */ -// TODO: Rename members. -public class SearchResult { - protected final ArrayList<ByteBuffer> buffers = new ArrayList<ByteBuffer>(); - protected final IHyracksCommonContext ctx; - protected final FixedSizeFrameTupleAppender appender; - protected final FixedSizeFrameTupleAccessor accessor; - protected final FixedSizeTupleReference tuple; - protected final ITypeTraits[] typeTraits; - protected final int invListElementSize; - - protected int currBufIdx; - protected int numResults; - - public SearchResult(ITypeTraits[] invListFields, IHyracksCommonContext ctx) throws HyracksDataException { - typeTraits = new ITypeTraits[invListFields.length + 1]; - int tmp = 0; - for (int i = 0; i < invListFields.length; i++) { - typeTraits[i] = invListFields[i]; - tmp += invListFields[i].getFixedLength(); - } - invListElementSize = tmp; - // Integer for counting occurrences. - typeTraits[invListFields.length] = IntegerPointable.TYPE_TRAITS; - this.ctx = ctx; - appender = new FixedSizeFrameTupleAppender(ctx.getInitialFrameSize(), typeTraits); - accessor = new FixedSizeFrameTupleAccessor(ctx.getInitialFrameSize(), typeTraits); - tuple = new FixedSizeTupleReference(typeTraits); - buffers.add(ctx.allocateFrame()); - } - - /** - * Initialize from other search-result object to share member instances except for result buffers. - * - * @throws HyracksDataException - */ - public SearchResult(SearchResult other) throws HyracksDataException { - this.ctx = other.ctx; - this.appender = other.appender; - this.accessor = other.accessor; - this.tuple = other.tuple; - this.typeTraits = other.typeTraits; - this.invListElementSize = other.invListElementSize; - buffers.add(ctx.allocateFrame()); - } - - public FixedSizeFrameTupleAccessor getAccessor() { - return accessor; - } - - public FixedSizeFrameTupleAppender getAppender() { - return appender; - } - - public FixedSizeTupleReference getTuple() { - return tuple; - } - - public ArrayList<ByteBuffer> getBuffers() { - return buffers; - } - - public void reset() { - currBufIdx = 0; - numResults = 0; - appender.reset(buffers.get(0), true); - } - - public void clear() { - currBufIdx = 0; - numResults = 0; - for (ByteBuffer buffer : buffers) { - appender.reset(buffer, true); - } - } - - public void append(ITupleReference invListElement, int count) throws HyracksDataException { - ByteBuffer currentBuffer = buffers.get(currBufIdx); - if (!appender.hasSpace()) { - currBufIdx++; - if (currBufIdx >= buffers.size()) { - buffers.add(ctx.allocateFrame()); - } - currentBuffer = buffers.get(currBufIdx); - appender.reset(currentBuffer, true); - } - // Append inverted-list element. - if (!appender.append(invListElement.getFieldData(0), invListElement.getFieldStart(0), invListElementSize)) { - throw new IllegalStateException(); - } - // Append count. - if (!appender.append(count)) { - throw new IllegalStateException(); - } - appender.incrementTupleCount(1); - numResults++; - } - - public int getCurrentBufferIndex() { - return currBufIdx; - } - - public ITypeTraits[] getTypeTraits() { - return typeTraits; - } - - public int getNumResults() { - return numResults; - } - -} http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java index 4c9f037..9808ae1 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/search/TOccurrenceSearcher.java @@ -21,26 +21,30 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.search; import java.util.ArrayList; -import org.apache.hyracks.api.context.IHyracksCommonContext; +import org.apache.hyracks.api.context.IHyracksTaskContext; import org.apache.hyracks.api.exceptions.ErrorCode; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.storage.am.common.api.IIndexOperationContext; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInPlaceInvertedIndex; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier; -import org.apache.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor; -import org.apache.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexSearchCursor; +import org.apache.hyracks.storage.am.lsm.invertedindex.api.InvertedListCursor; +import org.apache.hyracks.storage.common.IIndexCursor; +/** + * Conducts T-Occurrence searches on inverted lists. + */ public class TOccurrenceSearcher extends AbstractTOccurrenceSearcher { - protected final ArrayList<IInvertedListCursor> invListCursors = new ArrayList<>(); + protected final ArrayList<InvertedListCursor> invListCursors = new ArrayList<>(); - public TOccurrenceSearcher(IHyracksCommonContext ctx, IInPlaceInvertedIndex invIndex) throws HyracksDataException { - super(ctx, invIndex); + public TOccurrenceSearcher(IInPlaceInvertedIndex invIndex, IHyracksTaskContext ctx) throws HyracksDataException { + super(invIndex, ctx); } @Override - public void search(OnDiskInvertedIndexSearchCursor resultCursor, InvertedIndexSearchPredicate searchPred, - IIndexOperationContext ictx) throws HyracksDataException { + public void search(IIndexCursor resultCursor, InvertedIndexSearchPredicate searchPred, IIndexOperationContext ictx) + throws HyracksDataException { + prepareSearch(); tokenizeQuery(searchPred); int numQueryTokens = queryTokenAppender.getTupleCount(); @@ -48,7 +52,7 @@ public class TOccurrenceSearcher extends AbstractTOccurrenceSearcher { invListCursorCache.reset(); for (int i = 0; i < numQueryTokens; i++) { searchKey.reset(queryTokenAppender, i); - IInvertedListCursor invListCursor = invListCursorCache.getNext(); + InvertedListCursor invListCursor = invListCursorCache.getNext(); invIndex.openInvertedListCursor(invListCursor, searchKey, ictx); invListCursors.add(invListCursor); } @@ -60,8 +64,54 @@ public class TOccurrenceSearcher extends AbstractTOccurrenceSearcher { } int numPrefixLists = searchModifier.getNumPrefixLists(occurrenceThreshold, invListCursors.size()); - searchResult.reset(); - invListMerger.merge(invListCursors, occurrenceThreshold, numPrefixLists, searchResult); + // For a single inverted list case, we don't need to call merge() method since elements from a single inverted + // list cursor will be the final answer. + if (numQueryTokens == 1 && occurrenceThreshold == 1) { + singleInvListCursor = invListCursors.get(0); + singleInvListCursor.prepareLoadPages(); + singleInvListCursor.loadPages(); + isSingleInvertedList = true; + isFinishedSearch = true; + } else { + finalSearchResult.reset(); + isFinishedSearch = + invListMerger.merge(invListCursors, occurrenceThreshold, numPrefixLists, finalSearchResult); + searchResultBuffer = finalSearchResult.getNextFrame(); + searchResultTupleIndex = 0; + searchResultFta.reset(searchResultBuffer); + } + + if (isFinishedSearch) { + invListMerger.close(); + finalSearchResult.finalizeWrite(); + } + // Some or all output was generated by the merger. Let the result cursor fetch the output. resultCursor.open(null, searchPred); } + + /** + * Continues a search process if it was paused because the output buffer (one frame) of the final result was full. + * This method should not be called for a single inverted list case since there cannot be multiple inverted list + * cursors for a single keyword. + * + * @return true only if all processing for the final list is done. + * false otherwise. + * @throws HyracksDataException + */ + @Override + public boolean continueSearch() throws HyracksDataException { + if (isFinishedSearch) { + return true; + } + isFinishedSearch = invListMerger.continueMerge(); + searchResultBuffer = finalSearchResult.getNextFrame(); + searchResultTupleIndex = 0; + searchResultFta.reset(searchResultBuffer); + if (isFinishedSearch) { + invListMerger.close(); + finalSearchResult.finalizeWrite(); + } + return isFinishedSearch; + } + } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/ObjectCache.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/ObjectCache.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/ObjectCache.java index 01c74c9..ccfe1b8 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/ObjectCache.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/util/ObjectCache.java @@ -21,6 +21,7 @@ package org.apache.hyracks.storage.am.lsm.invertedindex.util; import java.util.ArrayList; +import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.storage.am.lsm.invertedindex.api.IObjectFactory; public class ObjectCache<T> { @@ -29,14 +30,14 @@ public class ObjectCache<T> { protected final ArrayList<T> cache; protected int lastReturned = 0; - public ObjectCache(IObjectFactory<T> objFactory, int initialSize, int expandSize) { + public ObjectCache(IObjectFactory<T> objFactory, int initialSize, int expandSize) throws HyracksDataException { this.objFactory = objFactory; this.cache = new ArrayList<T>(initialSize); this.expandSize = expandSize; expand(initialSize); } - private void expand(int expandSize) { + private void expand(int expandSize) throws HyracksDataException { for (int i = 0; i < expandSize; i++) { cache.add(objFactory.create()); } @@ -46,7 +47,7 @@ public class ObjectCache<T> { lastReturned = 0; } - public T getNext() { + public T getNext() throws HyracksDataException { if (lastReturned >= cache.size()) { expand(expandSize); } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java index ee37043..0dc25ec 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java @@ -51,9 +51,8 @@ import org.apache.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences import org.apache.hyracks.storage.am.lsm.common.impls.LSMComponentFilterManager; import org.apache.hyracks.storage.am.rtree.frames.RTreeFrameFactory; import org.apache.hyracks.storage.am.rtree.impls.RTree; +import org.apache.hyracks.storage.common.IIndexAccessParameters; import org.apache.hyracks.storage.common.IIndexCursor; -import org.apache.hyracks.storage.common.IModificationOperationCallback; -import org.apache.hyracks.storage.common.ISearchOperationCallback; import org.apache.hyracks.storage.common.ISearchPredicate; import org.apache.hyracks.storage.common.buffercache.IBufferCache; import org.apache.hyracks.util.trace.ITracer; @@ -229,11 +228,10 @@ public abstract class AbstractLSMRTree extends AbstractLSMIndex implements ITree } @Override - protected LSMRTreeOpContext createOpContext(IModificationOperationCallback modCallback, - ISearchOperationCallback searchCallback) { + protected LSMRTreeOpContext createOpContext(IIndexAccessParameters iap) { return new LSMRTreeOpContext(this, memoryComponents, rtreeLeafFrameFactory, rtreeInteriorFrameFactory, - btreeLeafFrameFactory, modCallback, searchCallback, getTreeFields(), getFilterFields(), getHarness(), - comparatorFields, linearizerArray, getFilterCmpFactories(), tracer); + btreeLeafFrameFactory, iap.getModificationCallback(), iap.getSearchOperationCallback(), getTreeFields(), + getFilterFields(), getHarness(), comparatorFields, linearizerArray, getFilterCmpFactories(), tracer); } @Override http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java index d891d9e..7c1467c 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java @@ -374,8 +374,7 @@ public class LSMRTree extends AbstractLSMRTree { @Override public ILSMIndexAccessor createAccessor(IIndexAccessParameters iap) { - return new LSMRTreeAccessor(getHarness(), - createOpContext(iap.getModificationCallback(), iap.getSearchOperationCallback()), buddyBTreeFields); + return new LSMRTreeAccessor(getHarness(), createOpContext(iap), buddyBTreeFields); } // This function is modified for R-Trees without antimatter tuples to allow buddy B-Tree to have only primary keys http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java index cade80f..3bb94ed 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/org/apache/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java @@ -260,7 +260,7 @@ public class LSMRTreeWithAntiMatterTuples extends AbstractLSMRTree { @Override public ILSMIndexAccessor createAccessor(IIndexAccessParameters iap) { - LSMRTreeOpContext opCtx = createOpContext(iap.getModificationCallback(), iap.getSearchOperationCallback()); + LSMRTreeOpContext opCtx = createOpContext(iap); return new LSMTreeIndexAccessor(getHarness(), opCtx, cursorFactory); } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorNodePushable.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorNodePushable.java b/hyracks-fullstack/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorNodePushable.java index 886285c..18ced6d 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorNodePushable.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-am-rtree/src/main/java/org/apache/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorNodePushable.java @@ -30,6 +30,7 @@ import org.apache.hyracks.storage.am.common.dataflow.IndexSearchOperatorNodePush import org.apache.hyracks.storage.am.common.tuples.PermutingFrameTupleReference; import org.apache.hyracks.storage.am.rtree.impls.SearchPredicate; import org.apache.hyracks.storage.am.rtree.util.RTreeUtils; +import org.apache.hyracks.storage.common.IIndexAccessParameters; import org.apache.hyracks.storage.common.ISearchPredicate; import org.apache.hyracks.storage.common.MultiComparator; @@ -88,4 +89,10 @@ public class RTreeSearchOperatorNodePushable extends IndexSearchOperatorNodePush protected int getFieldCount() { return ((ITreeIndex) index).getFieldCount(); } + + @Override + protected void addAdditionalIndexAccessorParams(IIndexAccessParameters iap) throws HyracksDataException { + // no additional parameteres are required for the B+Tree search case yet + } + } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexAccessor.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexAccessor.java b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexAccessor.java index 382aea2..0cb1b29 100644 --- a/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexAccessor.java +++ b/hyracks-fullstack/hyracks/hyracks-storage-common/src/main/java/org/apache/hyracks/storage/common/IIndexAccessor.java @@ -83,8 +83,10 @@ public interface IIndexAccessor extends IDestroyable { /** * Creates a cursor appropriate for passing into search(). * + * @throws HyracksDataException + * */ - IIndexCursor createSearchCursor(boolean exclusive); + IIndexCursor createSearchCursor(boolean exclusive) throws HyracksDataException; /** * Open the given cursor for an index search using the given predicate as http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/AbstractIndexTestWorker.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/AbstractIndexTestWorker.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/AbstractIndexTestWorker.java index b9123a5..830bab5 100644 --- a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/AbstractIndexTestWorker.java +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/AbstractIndexTestWorker.java @@ -37,7 +37,7 @@ public abstract class AbstractIndexTestWorker extends Thread implements ITreeInd private final TestOperationSelector opSelector; private final int numBatches; - protected final IIndexAccessor indexAccessor; + protected IIndexAccessor indexAccessor; public AbstractIndexTestWorker(DataGenThread dataGen, TestOperationSelector opSelector, IIndex index, int numBatches) throws HyracksDataException { http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/IndexTestContext.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/IndexTestContext.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/IndexTestContext.java index 2c08ba0..effeafc 100644 --- a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/IndexTestContext.java +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/common/IndexTestContext.java @@ -35,7 +35,7 @@ public abstract class IndexTestContext<T extends CheckTuple> implements IIndexTe protected final IIndex index; protected final ArrayTupleBuilder tupleBuilder; protected final ArrayTupleReference tuple = new ArrayTupleReference(); - protected final IIndexAccessor indexAccessor; + protected IIndexAccessor indexAccessor; public IndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index, boolean filtered) throws HyracksDataException { http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/config/AccessMethodTestsConfig.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/config/AccessMethodTestsConfig.java index 60e10ad..1e462f8 100644 --- a/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/config/AccessMethodTestsConfig.java +++ b/hyracks-fullstack/hyracks/hyracks-test-support/src/main/java/org/apache/hyracks/storage/am/config/AccessMethodTestsConfig.java @@ -85,6 +85,9 @@ public class AccessMethodTestsConfig { public static final int LSM_INVINDEX_HYRACKS_FRAME_SIZE = 32768; public static final double LSM_INVINDEX_BLOOMFILTER_FALSE_POSITIVE_RATE = 0.01; public static final int LSM_INVINDEX_NUM_MUTABLE_COMPONENTS = 2; + // inverted-index-search frame limit for the test purposes (minimum 5) + // (1 for query token frame, 2 for intermediate results, 1 for the final result, 1 for inverted lists) + public static final int LSM_INVINDEX_SEARCH_FRAME_LIMIT = 5; // Test parameters. public static final int LSM_INVINDEX_NUM_DOCS_TO_INSERT = 100; http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreePointSearchCursorTest.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreePointSearchCursorTest.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreePointSearchCursorTest.java index 18d89ca..b16daf5 100644 --- a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreePointSearchCursorTest.java +++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreePointSearchCursorTest.java @@ -113,7 +113,7 @@ public class LSMBTreePointSearchCursorTest extends IIndexCursorTest { @Override protected IIndexCursor createCursor(IIndexAccessor accessor) { - opCtx = lsmBtree.createOpContext(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE); + opCtx = lsmBtree.createOpContext(NoOpIndexAccessParameters.INSTANCE); return new LSMBTreePointSearchCursor(opCtx); } http://git-wip-us.apache.org/repos/asf/asterixdb/blob/afe0d3d9/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreeRangeSearchCursorTest.java ---------------------------------------------------------------------- diff --git a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreeRangeSearchCursorTest.java b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreeRangeSearchCursorTest.java index e436596..e6850aa 100644 --- a/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreeRangeSearchCursorTest.java +++ b/hyracks-fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/org/apache/hyracks/storage/am/lsm/btree/cursor/LSMBTreeRangeSearchCursorTest.java @@ -94,7 +94,7 @@ public class LSMBTreeRangeSearchCursorTest extends IIndexCursorTest { @Override protected IIndexCursor createCursor(IIndexAccessor accessor) { - opCtx = lsmBtree.createOpContext(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE); + opCtx = lsmBtree.createOpContext(NoOpIndexAccessParameters.INSTANCE); return new LSMBTreeRangeSearchCursor(opCtx); }