[CARBONDATA-2655][BloomDataMap] BloomFilter datamap support in operator

Now queries with in expression on bloom index column can leverage the
BloomFilter datamap.

This closes #2445


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/202d099d
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/202d099d
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/202d099d

Branch: refs/heads/carbonstore
Commit: 202d099d631571a96acdf781749561ad3f0da36a
Parents: f911403
Author: xuchuanyin <xuchuan...@hust.edu.cn>
Authored: Sat Jul 7 22:19:53 2018 +0800
Committer: Jacky Li <jacky.li...@qq.com>
Committed: Thu Jul 12 16:38:51 2018 +0800

----------------------------------------------------------------------
 .../datamap/bloom/BloomCoarseGrainDataMap.java  | 48 ++++++++++++++++++--
 .../bloom/BloomCoarseGrainDataMapFactory.java   |  1 +
 .../bloom/BloomCoarseGrainDataMapSuite.scala    | 36 +++++++++++++++
 3 files changed, 82 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index 01bd804..96f3495 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -55,6 +55,8 @@ import 
org.apache.carbondata.core.scan.expression.ColumnExpression;
 import org.apache.carbondata.core.scan.expression.Expression;
 import org.apache.carbondata.core.scan.expression.LiteralExpression;
 import 
org.apache.carbondata.core.scan.expression.conditional.EqualToExpression;
+import org.apache.carbondata.core.scan.expression.conditional.InExpression;
+import org.apache.carbondata.core.scan.expression.conditional.ListExpression;
 import org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf;
 import org.apache.carbondata.core.util.CarbonProperties;
 import org.apache.carbondata.core.util.CarbonUtil;
@@ -178,6 +180,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
   private List<BloomQueryModel> createQueryModel(Expression expression)
       throws DictionaryGenerationException, UnsupportedEncodingException {
     List<BloomQueryModel> queryModels = new ArrayList<BloomQueryModel>();
+    // bloomdatamap only support equalTo and In operators now
     if (expression instanceof EqualToExpression) {
       Expression left = ((EqualToExpression) expression).getLeft();
       Expression right = ((EqualToExpression) expression).getRight();
@@ -186,7 +189,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
         column = ((ColumnExpression) left).getColumnName();
         if (this.name2Col.containsKey(column)) {
           BloomQueryModel bloomQueryModel =
-              buildQueryModelFromExpression((ColumnExpression) left, 
(LiteralExpression) right);
+              buildQueryModelForEqual((ColumnExpression) left, 
(LiteralExpression) right);
           queryModels.add(bloomQueryModel);
         }
         return queryModels;
@@ -194,10 +197,35 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
         column = ((ColumnExpression) right).getColumnName();
         if (this.name2Col.containsKey(column)) {
           BloomQueryModel bloomQueryModel =
-              buildQueryModelFromExpression((ColumnExpression) right, 
(LiteralExpression) left);
+              buildQueryModelForEqual((ColumnExpression) right, 
(LiteralExpression) left);
           queryModels.add(bloomQueryModel);
         }
         return queryModels;
+      } else {
+        LOGGER.warn("BloomFilter can only support the 'equal' filter like 'Col 
= PlainValue'");
+      }
+    } else if (expression instanceof InExpression) {
+      Expression left = ((InExpression) expression).getLeft();
+      Expression right = ((InExpression) expression).getRight();
+      String column;
+      if (left instanceof ColumnExpression && right instanceof ListExpression) 
{
+        column = ((ColumnExpression) left).getColumnName();
+        if (this.name2Col.containsKey(column)) {
+          List<BloomQueryModel> models =
+              buildQueryModelForIn((ColumnExpression) left, (ListExpression) 
right);
+          queryModels.addAll(models);
+        }
+        return queryModels;
+      } else if (left instanceof ListExpression && right instanceof 
ColumnExpression) {
+        column = ((ColumnExpression) right).getColumnName();
+        if (this.name2Col.containsKey(column)) {
+          List<BloomQueryModel> models =
+              buildQueryModelForIn((ColumnExpression) right, (ListExpression) 
left);
+          queryModels.addAll(models);
+        }
+        return queryModels;
+      } else {
+        LOGGER.warn("BloomFilter can only support the 'in' filter like 'Col in 
(PlainValues)'");
       }
     }
 
@@ -207,7 +235,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     return queryModels;
   }
 
-  private BloomQueryModel buildQueryModelFromExpression(ColumnExpression ce,
+  private BloomQueryModel buildQueryModelForEqual(ColumnExpression ce,
       LiteralExpression le) throws DictionaryGenerationException, 
UnsupportedEncodingException {
     String columnName = ce.getColumnName();
     DataType dataType = ce.getDataType();
@@ -234,6 +262,20 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     return buildQueryModelInternal(this.name2Col.get(columnName), 
literalValue, dataType);
   }
 
+  /**
+   * for `in` expressions, we use `equal` to handle it.
+   * Note that `in` operator needs at least one match not exactly match. since 
while doing pruning,
+   * we collect all the blocklets that will match the querymodel, this will 
not be a problem.
+   */
+  private List<BloomQueryModel> buildQueryModelForIn(ColumnExpression ce, 
ListExpression le)
+      throws DictionaryGenerationException, UnsupportedEncodingException {
+    List<BloomQueryModel> queryModels = new ArrayList<>();
+    for (Expression child : le.getChildren()) {
+      queryModels.add(buildQueryModelForEqual(ce, (LiteralExpression) child));
+    }
+    return queryModels;
+  }
+
   private BloomQueryModel buildQueryModelInternal(CarbonColumn carbonColumn,
       Object filterLiteralValue, DataType filterValueDataType) throws
       DictionaryGenerationException, UnsupportedEncodingException {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
index bf56043..68cf45c 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
@@ -112,6 +112,7 @@ public class BloomCoarseGrainDataMapFactory extends 
DataMapFactory<CoarseGrainDa
     List<ExpressionType> optimizedOperations = new ArrayList<ExpressionType>();
     // todo: support more optimize operations
     optimizedOperations.add(ExpressionType.EQUALS);
+    optimizedOperations.add(ExpressionType.IN);
     this.dataMapMeta = new DataMapMeta(this.dataMapName, indexedColumns, 
optimizedOperations);
     LOGGER.info(String.format("DataMap %s works for %s with bloom size %d",
         this.dataMapName, this.dataMapMeta, this.bloomFilterSize));

http://git-wip-us.apache.org/repos/asf/carbondata/blob/202d099d/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 14f8966..4dc1837 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -57,6 +57,9 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll with
   }
 
   private def checkQuery(dataMapName: String, shouldHit: Boolean = true) = {
+    /**
+     * queries that use equal operator
+     */
     checkAnswer(
       checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id = 1", 
dataMapName, shouldHit),
       sql(s"select * from $normalTable where id = 1"))
@@ -85,6 +88,39 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll with
     checkAnswer(
       checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city = 
'city_999' and name='n1'", dataMapName, shouldHit),
       sql(s"select * from $normalTable where city = 'city_999' and name='n1'"))
+
+    /**
+     * queries that use in operator
+     */
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (1)", 
dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id in (1)"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in 
(999)", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id in (999)"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in( 
'city_1')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city in( 'city_1')"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in 
('city_999')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city in ('city_999')"))
+    // query with two index_columns
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (1) 
and city in ('city_1')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id in (1) and city in 
('city_1')"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where id in (999) 
and city in ('city_999')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where id in (999) and city in 
('city_999')"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in 
('city_1') and id in (0)", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city in ('city_1') and id in 
(0)"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in 
('city_999') and name in ('n999')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city in ('city_999') and name in 
('n999')"))
+    checkAnswer(
+      checkSqlHitDataMap(s"select * from $bloomDMSampleTable where city in 
('city_999') and name in ('n1')", dataMapName, shouldHit),
+      sql(s"select * from $normalTable where city in ('city_999') and name in 
('n1')"))
+
     checkAnswer(
       sql(s"select min(id), max(id), min(name), max(name), min(city), 
max(city)" +
           s" from $bloomDMSampleTable"),

Reply via email to