Github user sounakr commented on a diff in the pull request: https://github.com/apache/carbondata/pull/1079#discussion_r125199254 --- Diff: core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java --- @@ -18,56 +18,152 @@ import java.io.IOException; import java.util.BitSet; +import java.util.Comparator; import org.apache.carbondata.core.datastore.block.SegmentProperties; import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk; +import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk; import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk; +import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk; +import org.apache.carbondata.core.metadata.datatype.DataType; import org.apache.carbondata.core.scan.filter.FilterUtil; +import org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil; import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo; +import org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo; import org.apache.carbondata.core.scan.processor.BlocksChunkHolder; import org.apache.carbondata.core.util.BitSetGroup; import org.apache.carbondata.core.util.CarbonUtil; +import org.apache.carbondata.core.util.DataTypeUtil; public class ExcludeFilterExecuterImpl implements FilterExecuter { protected DimColumnResolvedFilterInfo dimColEvaluatorInfo; protected DimColumnExecuterFilterInfo dimColumnExecuterInfo; + protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo; + protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo; protected SegmentProperties segmentProperties; + protected boolean isDimensionPresentInCurrentBlock = false; + protected boolean isMeasurePresentInCurrentBlock = false; /** * is dimension column data is natural sorted */ - private boolean isNaturalSorted; + private boolean isNaturalSorted = false; + public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo dimColEvaluatorInfo, - SegmentProperties segmentProperties) { - this.dimColEvaluatorInfo = dimColEvaluatorInfo; - dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo, SegmentProperties segmentProperties, + boolean isMeasure) { this.segmentProperties = segmentProperties; - FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, - dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo); - isNaturalSorted = dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo - .getDimension().isSortColumn(); + if (isMeasure == false) { + this.dimColEvaluatorInfo = dimColEvaluatorInfo; + dimColumnExecuterInfo = new DimColumnExecuterFilterInfo(); + + FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(), segmentProperties, + dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null, null); + isDimensionPresentInCurrentBlock = true; + isNaturalSorted = + dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo + .getDimension().isSortColumn(); + } else { + this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo; + msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo(); + FilterUtil + .prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(), segmentProperties, + null, null, msrColumnEvaluatorInfo.getMeasure(), msrColumnExecutorInfo); + isMeasurePresentInCurrentBlock = true; + } + } @Override public BitSetGroup applyFilter(BlocksChunkHolder blockChunkHolder) throws IOException { - int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() - .get(dimColEvaluatorInfo.getColumnIndex()); - if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { - blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() - .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + if (isDimensionPresentInCurrentBlock == true) { + int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping() + .get(dimColEvaluatorInfo.getColumnIndex()); + if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) { + blockChunkHolder.getDimensionRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex); + } + DimensionRawColumnChunk dimensionRawColumnChunk = + blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; + DimensionColumnDataChunk[] dimensionColumnDataChunks = + dimensionRawColumnChunk.convertToDimColDataChunks(); + BitSetGroup bitSetGroup = new BitSetGroup(dimensionRawColumnChunk.getPagesCount()); + for (int i = 0; i < dimensionColumnDataChunks.length; i++) { + BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i], + dimensionRawColumnChunk.getRowCount()[i]); + bitSetGroup.setBitSet(bitSet, i); + } + + return bitSetGroup; + } else if (isMeasurePresentInCurrentBlock == true) { + int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping() + .get(msrColumnEvaluatorInfo.getColumnIndex()); + if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) { + blockChunkHolder.getMeasureRawDataChunk()[blockIndex] = blockChunkHolder.getDataBlock() + .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex); + } + MeasureRawColumnChunk measureRawColumnChunk = + blockChunkHolder.getMeasureRawDataChunk()[blockIndex]; + MeasureColumnDataChunk[] measureColumnDataChunks = + measureRawColumnChunk.convertToMeasureColDataChunks(); + BitSetGroup bitSetGroup = new BitSetGroup(measureRawColumnChunk.getPagesCount()); + DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo); + for (int i = 0; i < measureColumnDataChunks.length; i++) { + BitSet bitSet = + getFilteredIndexes(measureColumnDataChunks[i], measureRawColumnChunk.getRowCount()[i], + msrType); + bitSetGroup.setBitSet(bitSet, i); + } + return bitSetGroup; } - DimensionRawColumnChunk dimensionRawColumnChunk = - blockChunkHolder.getDimensionRawDataChunk()[blockIndex]; - DimensionColumnDataChunk[] dimensionColumnDataChunks = - dimensionRawColumnChunk.convertToDimColDataChunks(); - BitSetGroup bitSetGroup = - new BitSetGroup(dimensionRawColumnChunk.getPagesCount()); - for (int i = 0; i < dimensionColumnDataChunks.length; i++) { - BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i], - dimensionRawColumnChunk.getRowCount()[i]); - bitSetGroup.setBitSet(bitSet, i); + return null; + } + + private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo) { + switch (msrColumnEvaluatorInfo.getType()) { + case SHORT: + return DataType.SHORT; + case INT: + return DataType.INT; + case LONG: + return DataType.LONG; + case DECIMAL: + return DataType.DECIMAL; + default: + return DataType.DOUBLE; } + } - return bitSetGroup; + protected BitSet getFilteredIndexes(MeasureColumnDataChunk measureColumnDataChunk, + int numerOfRows, DataType msrType) { + // Here the algorithm is + // Get the measure values from the chunk. compare sequentially with the + // the filter values. The one that matches sets it Bitset. + BitSet bitSet = new BitSet(numerOfRows); + bitSet.flip(0, numerOfRows); + byte[][] filterValues = msrColumnExecutorInfo.getFilterKeys(); --- End diff -- As of now using ByteArray in filterKeys, in later optimization will convert will store objects.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---