[CARBONDATA-2655][BloomDataMap] BloomFilter datamap support in operator Now queries with in expression on bloom index column can leverage the BloomFilter datamap.
This closes #2445 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/202d099d Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/202d099d Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/202d099d Branch: refs/heads/carbonstore Commit: 202d099d631571a96acdf781749561ad3f0da36a Parents: f911403 Author: xuchuanyin <xuchuan...@hust.edu.cn> Authored: Sat Jul 7 22:19:53 2018 +0800 Committer: Jacky Li <jacky.li...@qq.com> Committed: Thu Jul 12 16:38:51 2018 +0800 ---------------------------------------------------------------------- .../datamap/bloom/BloomCoarseGrainDataMap.java | 48 ++++++++++++++++++-- .../bloom/BloomCoarseGrainDataMapFactory.java | 1 + .../bloom/BloomCoarseGrainDataMapSuite.scala | 36 +++++++++++++++ 3 files changed, 82 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java index 01bd804..96f3495 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java @@ -55,6 +55,8 @@ import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.Expression; import org.apache.carbondata.core.scan.expression.LiteralExpression; import org.apache.carbondata.core.scan.expression.conditional.EqualToExpression; +import org.apache.carbondata.core.scan.expression.conditional.InExpression; +import org.apache.carbondata.core.scan.expression.conditional.ListExpression; import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf; import org.apache.carbondata.core.util.CarbonProperties; import org.apache.carbondata.core.util.CarbonUtil; @@ -178,6 +180,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { private List<BloomQueryModel> createQueryModel(Expression expression) throws DictionaryGenerationException, UnsupportedEncodingException { List<BloomQueryModel> queryModels = new ArrayList<BloomQueryModel>(); + // bloomdatamap only support equalTo and In operators now if (expression instanceof EqualToExpression) { Expression left = ((EqualToExpression) expression).getLeft(); Expression right = ((EqualToExpression) expression).getRight(); @@ -186,7 +189,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { column = ((ColumnExpression) left).getColumnName(); if (this.name2Col.containsKey(column)) { BloomQueryModel bloomQueryModel = - buildQueryModelFromExpression((ColumnExpression) left, (LiteralExpression) right); + buildQueryModelForEqual((ColumnExpression) left, (LiteralExpression) right); queryModels.add(bloomQueryModel); } return queryModels; @@ -194,10 +197,35 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { column = ((ColumnExpression) right).getColumnName(); if (this.name2Col.containsKey(column)) { BloomQueryModel bloomQueryModel = - buildQueryModelFromExpression((ColumnExpression) right, (LiteralExpression) left); + buildQueryModelForEqual((ColumnExpression) right, (LiteralExpression) left); queryModels.add(bloomQueryModel); } return queryModels; + } else { + LOGGER.warn("BloomFilter can only support the 'equal' filter like 'Col = PlainValue'"); + } + } else if (expression instanceof InExpression) { + Expression left = ((InExpression) expression).getLeft(); + Expression right = ((InExpression) expression).getRight(); + String column; + if (left instanceof ColumnExpression && right instanceof ListExpression) { + column = ((ColumnExpression) left).getColumnName(); + if (this.name2Col.containsKey(column)) { + List<BloomQueryModel> models = + buildQueryModelForIn((ColumnExpression) left, (ListExpression) right); + queryModels.addAll(models); + } + return queryModels; + } else if (left instanceof ListExpression && right instanceof ColumnExpression) { + column = ((ColumnExpression) right).getColumnName(); + if (this.name2Col.containsKey(column)) { + List<BloomQueryModel> models = + buildQueryModelForIn((ColumnExpression) right, (ListExpression) left); + queryModels.addAll(models); + } + return queryModels; + } else { + LOGGER.warn("BloomFilter can only support the 'in' filter like 'Col in (PlainValues)'"); } } @@ -207,7 +235,7 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { return queryModels; } - private BloomQueryModel buildQueryModelFromExpression(ColumnExpression ce, + private BloomQueryModel buildQueryModelForEqual(ColumnExpression ce, LiteralExpression le) throws DictionaryGenerationException, UnsupportedEncodingException { String columnName = ce.getColumnName(); DataType dataType = ce.getDataType(); @@ -234,6 +262,20 @@ public class BloomCoarseGrainDataMap extends CoarseGrainDataMap { return buildQueryModelInternal(this.name2Col.get(columnName), literalValue, dataType); } + /** + * for `in` expressions, we use `equal` to handle it. + * Note that `in` operator needs at least one match not exactly match. since while doing pruning, + * we collect all the blocklets that will match the querymodel, this will not be a problem. + */ + private List<BloomQueryModel> buildQueryModelForIn(ColumnExpression ce, ListExpression le) + throws DictionaryGenerationException, UnsupportedEncodingException { + List<BloomQueryModel> queryModels = new ArrayList<>(); + for (Expression child : le.getChildren()) { + queryModels.add(buildQueryModelForEqual(ce, (LiteralExpression) child)); + } + return queryModels; + } + private BloomQueryModel buildQueryModelInternal(CarbonColumn carbonColumn, Object filterLiteralValue, DataType filterValueDataType) throws DictionaryGenerationException, UnsupportedEncodingException { http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java index bf56043..68cf45c 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java @@ -112,6 +112,7 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory<CoarseGrainDa List<ExpressionType> optimizedOperations = new ArrayList<ExpressionType>(); // todo: support more optimize operations optimizedOperations.add(ExpressionType.EQUALS); + optimizedOperations.add(ExpressionType.IN); this.dataMapMeta = new DataMapMeta(this.dataMapName, indexedColumns, optimizedOperations); LOGGER.info(String.format("DataMap %s works for %s with bloom size %d", this.dataMapName, this.dataMapMeta, this.bloomFilterSize)); http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala index 14f8966..4dc1837 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -57,6 +57,9 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with } private def checkQuery(dataMapName: String, shouldHit: Boolean = true) = { + /** + * queries that use equal operator + */ checkAnswer( checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 1", dataMapName, shouldHit), sql(s"select * from $normalTable where id = 1")) @@ -85,6 +88,39 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with checkAnswer( checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 'city_999' and name='n1'", dataMapName, shouldHit), sql(s"select * from $normalTable where city = 'city_999' and name='n1'")) + + /** + * queries that use in operator + */ + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (1)", dataMapName, shouldHit), + sql(s"select * from $normalTable where id in (1)")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (999)", dataMapName, shouldHit), + sql(s"select * from $normalTable where id in (999)")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in( 'city_1')", dataMapName, shouldHit), + sql(s"select * from $normalTable where city in( 'city_1')")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in ('city_999')", dataMapName, shouldHit), + sql(s"select * from $normalTable where city in ('city_999')")) + // query with two index_columns + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (1) and city in ('city_1')", dataMapName, shouldHit), + sql(s"select * from $normalTable where id in (1) and city in ('city_1')")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (999) and city in ('city_999')", dataMapName, shouldHit), + sql(s"select * from $normalTable where id in (999) and city in ('city_999')")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in ('city_1') and id in (0)", dataMapName, shouldHit), + sql(s"select * from $normalTable where city in ('city_1') and id in (0)")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in ('city_999') and name in ('n999')", dataMapName, shouldHit), + sql(s"select * from $normalTable where city in ('city_999') and name in ('n999')")) + checkAnswer( + checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in ('city_999') and name in ('n1')", dataMapName, shouldHit), + sql(s"select * from $normalTable where city in ('city_999') and name in ('n1')")) + checkAnswer( sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + s" from $bloomDMSampleTable"),