Github user ravipesala commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1079#discussion_r125153644
--- Diff:
core/src/main/java/org/apache/carbondata/core/scan/filter/executer/ExcludeFilterExecuterImpl.java
---
@@ -18,56 +18,152 @@
import java.io.IOException;
import java.util.BitSet;
+import java.util.Comparator;
import org.apache.carbondata.core.datastore.block.SegmentProperties;
import org.apache.carbondata.core.datastore.chunk.DimensionColumnDataChunk;
+import org.apache.carbondata.core.datastore.chunk.MeasureColumnDataChunk;
import
org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
+import
org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
+import org.apache.carbondata.core.metadata.datatype.DataType;
import org.apache.carbondata.core.scan.filter.FilterUtil;
+import
org.apache.carbondata.core.scan.filter.partition.PartitionFilterUtil;
import
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.DimColumnResolvedFilterInfo;
+import
org.apache.carbondata.core.scan.filter.resolver.resolverinfo.MeasureColumnResolvedFilterInfo;
import org.apache.carbondata.core.scan.processor.BlocksChunkHolder;
import org.apache.carbondata.core.util.BitSetGroup;
import org.apache.carbondata.core.util.CarbonUtil;
+import org.apache.carbondata.core.util.DataTypeUtil;
public class ExcludeFilterExecuterImpl implements FilterExecuter {
protected DimColumnResolvedFilterInfo dimColEvaluatorInfo;
protected DimColumnExecuterFilterInfo dimColumnExecuterInfo;
+ protected MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo;
+ protected MeasureColumnExecuterFilterInfo msrColumnExecutorInfo;
protected SegmentProperties segmentProperties;
+ protected boolean isDimensionPresentInCurrentBlock = false;
+ protected boolean isMeasurePresentInCurrentBlock = false;
/**
* is dimension column data is natural sorted
*/
- private boolean isNaturalSorted;
+ private boolean isNaturalSorted = false;
+
public ExcludeFilterExecuterImpl(DimColumnResolvedFilterInfo
dimColEvaluatorInfo,
- SegmentProperties segmentProperties) {
- this.dimColEvaluatorInfo = dimColEvaluatorInfo;
- dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
+ MeasureColumnResolvedFilterInfo msrColumnEvaluatorInfo,
SegmentProperties segmentProperties,
+ boolean isMeasure) {
this.segmentProperties = segmentProperties;
-
FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(),
segmentProperties,
- dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo);
- isNaturalSorted =
dimColEvaluatorInfo.getDimension().isUseInvertedIndex() && dimColEvaluatorInfo
- .getDimension().isSortColumn();
+ if (isMeasure == false) {
+ this.dimColEvaluatorInfo = dimColEvaluatorInfo;
+ dimColumnExecuterInfo = new DimColumnExecuterFilterInfo();
+
+
FilterUtil.prepareKeysFromSurrogates(dimColEvaluatorInfo.getFilterValues(),
segmentProperties,
+ dimColEvaluatorInfo.getDimension(), dimColumnExecuterInfo, null,
null);
+ isDimensionPresentInCurrentBlock = true;
+ isNaturalSorted =
+ dimColEvaluatorInfo.getDimension().isUseInvertedIndex() &&
dimColEvaluatorInfo
+ .getDimension().isSortColumn();
+ } else {
+ this.msrColumnEvaluatorInfo = msrColumnEvaluatorInfo;
+ msrColumnExecutorInfo = new MeasureColumnExecuterFilterInfo();
+ FilterUtil
+
.prepareKeysFromSurrogates(msrColumnEvaluatorInfo.getFilterValues(),
segmentProperties,
+ null, null, msrColumnEvaluatorInfo.getMeasure(),
msrColumnExecutorInfo);
+ isMeasurePresentInCurrentBlock = true;
+ }
+
}
@Override public BitSetGroup applyFilter(BlocksChunkHolder
blockChunkHolder) throws IOException {
- int blockIndex = segmentProperties.getDimensionOrdinalToBlockMapping()
- .get(dimColEvaluatorInfo.getColumnIndex());
- if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex]) {
- blockChunkHolder.getDimensionRawDataChunk()[blockIndex] =
blockChunkHolder.getDataBlock()
- .getDimensionChunk(blockChunkHolder.getFileReader(), blockIndex);
+ if (isDimensionPresentInCurrentBlock == true) {
+ int blockIndex =
segmentProperties.getDimensionOrdinalToBlockMapping()
+ .get(dimColEvaluatorInfo.getColumnIndex());
+ if (null == blockChunkHolder.getDimensionRawDataChunk()[blockIndex])
{
+ blockChunkHolder.getDimensionRawDataChunk()[blockIndex] =
blockChunkHolder.getDataBlock()
+ .getDimensionChunk(blockChunkHolder.getFileReader(),
blockIndex);
+ }
+ DimensionRawColumnChunk dimensionRawColumnChunk =
+ blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
+ DimensionColumnDataChunk[] dimensionColumnDataChunks =
+ dimensionRawColumnChunk.convertToDimColDataChunks();
+ BitSetGroup bitSetGroup = new
BitSetGroup(dimensionRawColumnChunk.getPagesCount());
+ for (int i = 0; i < dimensionColumnDataChunks.length; i++) {
+ BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i],
+ dimensionRawColumnChunk.getRowCount()[i]);
+ bitSetGroup.setBitSet(bitSet, i);
+ }
+
+ return bitSetGroup;
+ } else if (isMeasurePresentInCurrentBlock == true) {
+ int blockIndex = segmentProperties.getMeasuresOrdinalToBlockMapping()
+ .get(msrColumnEvaluatorInfo.getColumnIndex());
+ if (null == blockChunkHolder.getMeasureRawDataChunk()[blockIndex]) {
+ blockChunkHolder.getMeasureRawDataChunk()[blockIndex] =
blockChunkHolder.getDataBlock()
+ .getMeasureChunk(blockChunkHolder.getFileReader(), blockIndex);
+ }
+ MeasureRawColumnChunk measureRawColumnChunk =
+ blockChunkHolder.getMeasureRawDataChunk()[blockIndex];
+ MeasureColumnDataChunk[] measureColumnDataChunks =
+ measureRawColumnChunk.convertToMeasureColDataChunks();
+ BitSetGroup bitSetGroup = new
BitSetGroup(measureRawColumnChunk.getPagesCount());
+ DataType msrType = getMeasureDataType(msrColumnEvaluatorInfo);
+ for (int i = 0; i < measureColumnDataChunks.length; i++) {
+ BitSet bitSet =
+ getFilteredIndexes(measureColumnDataChunks[i],
measureRawColumnChunk.getRowCount()[i],
+ msrType);
+ bitSetGroup.setBitSet(bitSet, i);
+ }
+ return bitSetGroup;
}
- DimensionRawColumnChunk dimensionRawColumnChunk =
- blockChunkHolder.getDimensionRawDataChunk()[blockIndex];
- DimensionColumnDataChunk[] dimensionColumnDataChunks =
- dimensionRawColumnChunk.convertToDimColDataChunks();
- BitSetGroup bitSetGroup =
- new BitSetGroup(dimensionRawColumnChunk.getPagesCount());
- for (int i = 0; i < dimensionColumnDataChunks.length; i++) {
- BitSet bitSet = getFilteredIndexes(dimensionColumnDataChunks[i],
- dimensionRawColumnChunk.getRowCount()[i]);
- bitSetGroup.setBitSet(bitSet, i);
+ return null;
+ }
+
+ private DataType getMeasureDataType(MeasureColumnResolvedFilterInfo
msrColumnEvaluatorInfo) {
+ switch (msrColumnEvaluatorInfo.getType()) {
+ case SHORT:
+ return DataType.SHORT;
+ case INT:
+ return DataType.INT;
+ case LONG:
+ return DataType.LONG;
+ case DECIMAL:
+ return DataType.DECIMAL;
+ default:
+ return DataType.DOUBLE;
}
+ }
- return bitSetGroup;
+ protected BitSet getFilteredIndexes(MeasureColumnDataChunk
measureColumnDataChunk,
+ int numerOfRows, DataType msrType) {
+ // Here the algorithm is
+ // Get the measure values from the chunk. compare sequentially with the
+ // the filter values. The one that matches sets it Bitset.
+ BitSet bitSet = new BitSet(numerOfRows);
+ bitSet.flip(0, numerOfRows);
+ byte[][] filterValues = msrColumnExecutorInfo.getFilterKeys();
--- End diff --
better set objects to `msrColumnExecutorInfo` and use objects with out
converting
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---