[CARBONDATA-2799][BloomDataMap] Fix bugs in querying with bloom datamap on 
preagg with dictionary column

For preaggregate table, if the groupby column is dictionary column in
parent table, the preaggregate table will inherit the dictionary
encoding as well as the dictionary file from the parent table.

So for dictionary columns, during query with bloom, we need to
convert the plain filter value to dictionarty encoded value based on
parent table's dictionary file.

This closes #2580


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/bd6abbbf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/bd6abbbf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/bd6abbbf

Branch: refs/heads/external-format
Commit: bd6abbbffd36b5ca0aaad9d937d401982d1d60eb
Parents: b65bf9b
Author: xuchuanyin <xuchuan...@hust.edu.cn>
Authored: Mon Jul 30 17:50:51 2018 +0800
Committer: kunal642 <kunalkapoor...@gmail.com>
Committed: Thu Aug 2 16:55:59 2018 +0530

----------------------------------------------------------------------
 .../datamap/bloom/BloomCoarseGrainDataMap.java  | 21 ++++-
 .../BloomCoarseGrainDataMapFunctionSuite.scala  | 97 ++++++++++++++++++++
 2 files changed, 117 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/bd6abbbf/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
index be531d6..71b1c55 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMap.java
@@ -47,10 +47,12 @@ import 
org.apache.carbondata.core.devapi.DictionaryGenerationException;
 import org.apache.carbondata.core.indexstore.Blocklet;
 import org.apache.carbondata.core.indexstore.PartitionSpec;
 import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier;
+import org.apache.carbondata.core.metadata.CarbonMetadata;
 import org.apache.carbondata.core.metadata.datatype.DataType;
 import org.apache.carbondata.core.metadata.datatype.DataTypes;
 import org.apache.carbondata.core.metadata.encoder.Encoding;
 import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
+import org.apache.carbondata.core.metadata.schema.table.RelationIdentifier;
 import org.apache.carbondata.core.metadata.schema.table.column.CarbonColumn;
 import org.apache.carbondata.core.scan.expression.ColumnExpression;
 import org.apache.carbondata.core.scan.expression.Expression;
@@ -108,6 +110,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     for (CarbonColumn col : indexedColumn) {
       this.name2Col.put(col.getColName(), col);
     }
+    String parentTablePath = getAncestorTablePath(carbonTable);
 
     try {
       this.name2Converters = new HashMap<>(indexedColumn.size());
@@ -129,7 +132,7 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
         dataField.setTimestampFormat(tsFormat);
         FieldConverter fieldConverter = FieldEncoderFactory.getInstance()
             .createFieldEncoder(dataField, absoluteTableIdentifier, i, 
nullFormat, null, false,
-                localCaches[i], false, carbonTable.getTablePath());
+                localCaches[i], false, parentTablePath);
         this.name2Converters.put(indexedColumn.get(i).getColName(), 
fieldConverter);
       }
     } catch (IOException e) {
@@ -140,6 +143,22 @@ public class BloomCoarseGrainDataMap extends 
CoarseGrainDataMap {
     this.badRecordLogHolder.setLogged(false);
   }
 
+  /**
+   * recursively find the ancestor's table path. This is used for dictionary 
scenario
+   * where preagg will use the dictionary of the parent table.
+   */
+  private String getAncestorTablePath(CarbonTable currentTable) {
+    if (!currentTable.isChildDataMap()) {
+      return currentTable.getTablePath();
+    }
+
+    RelationIdentifier parentIdentifier =
+        currentTable.getTableInfo().getParentRelationIdentifiers().get(0);
+    CarbonTable parentTable = CarbonMetadata.getInstance().getCarbonTable(
+        parentIdentifier.getDatabaseName(), parentIdentifier.getTableName());
+    return getAncestorTablePath(parentTable);
+  }
+
   @Override
   public List<Blocklet> prune(FilterResolverIntf filterExp, SegmentProperties 
segmentProperties,
       List<PartitionSpec> partitions) throws IOException {

http://git-wip-us.apache.org/repos/asf/carbondata/blob/bd6abbbf/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFunctionSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFunctionSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFunctionSuite.scala
index 496a506..fd1345c 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFunctionSuite.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFunctionSuite.scala
@@ -832,6 +832,103 @@ class BloomCoarseGrainDataMapFunctionSuite  extends 
QueryTest with BeforeAndAfte
       CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
   }
 
+  /**
+   * create bloom and preagg on base table, then create bloom on preagg table,
+   * index column and group by column is dictionary column.
+   * note that the test steps are copied from issue.
+   * In the CI env, sometime it will become timeout, so we ignore the newly 
added tests
+   */
+  ignore("test bloom datamap: CARBONDATA-2799 bloom datamap on preaggregate") {
+    sql(
+      s"""
+         | CREATE TABLE $normalTable (id int, name string, salary float,dob 
date)
+         | STORED BY 'carbondata'
+         | TBLPROPERTIES('dictionary_include'='id')
+       """.stripMargin)
+    sql(
+      s"""
+         | CREATE TABLE $bloomDMSampleTable (id int, name string, salary 
float,dob date)
+         | STORED BY 'carbondata'
+         | TBLPROPERTIES('dictionary_include'='id')
+       """.stripMargin)
+    (1 to 2).foreach { _ =>
+      sql(
+        s"""
+           | INSERT INTO $bloomDMSampleTable VALUES
+           | ('1', 'name1', '11.1', '2018-07-01'),
+           | ('2', 'name2', '21.1', '2018-07-02'),
+           | ('3', 'name3', '31.1', '2018-07-03'),
+           | ('4', 'name4', '41.1', '2018-07-04')
+       """.stripMargin)
+      sql(
+        s"""
+           | INSERT INTO $normalTable VALUES
+           | ('1', 'name1', '11.1', '2018-07-01'),
+           | ('2', 'name2', '21.1', '2018-07-02'),
+           | ('3', 'name3', '31.1', '2018-07-03'),
+           | ('4', 'name4', '41.1', '2018-07-04')
+       """.stripMargin)
+    }
+    sql(
+      s"""
+         | CREATE DATAMAP $dataMapName ON TABLE $bloomDMSampleTable
+         | USING 'bloomfilter'
+         | DMPROPERTIES('INDEX_COLUMNS'='id', 'BLOOM_SIZE'='320000', 
'BLOOM_FPP'='0.01', 'BLOOM_COMPRESS'='TRUE')
+       """.stripMargin)
+    sql(
+      s"""
+         | INSERT INTO $bloomDMSampleTable VALUES
+         | ('1', 'name1', '11.1', '2018-07-01'),
+         | ('2', 'name2', '21.1', '2018-07-02'),
+         | ('3', 'name3', '31.1', '2018-07-03'),
+         | ('4', 'name4', '41.1', '2018-07-04')
+       """.stripMargin)
+    sql(
+      s"""
+         | INSERT INTO $normalTable VALUES
+         | ('1', 'name1', '11.1', '2018-07-01'),
+         | ('2', 'name2', '21.1', '2018-07-02'),
+         | ('3', 'name3', '31.1', '2018-07-03'),
+         | ('4', 'name4', '41.1', '2018-07-04')
+       """.stripMargin)
+    val preAggOnBase = "preagg_on_base"
+    sql(
+      s"""
+         | CREATE DATAMAP $preAggOnBase ON TABLE $bloomDMSampleTable
+         | USING 'preaggregate' AS
+         | select id, count(id) from $bloomDMSampleTable group by id
+       """.stripMargin)
+    checkAnswer(sql(s"SELECT id, count(id) from $bloomDMSampleTable where id = 
3 group by id"),
+      sql(s"SELECT id, count(id) from $normalTable where id = 3 group by id"))
+
+    val bloomOnPreAgg = "bloom_on_pre_agg"
+    sql(
+      s"""
+         | CREATE DATAMAP $bloomOnPreAgg ON TABLE 
${bloomDMSampleTable}_${preAggOnBase}
+         | USING 'bloomfilter'
+         | DMPROPERTIES('INDEX_COLUMNS'='${bloomDMSampleTable}_id')
+       """.stripMargin)
+    checkAnswer(sql(s"SELECT id, count(id) from $bloomDMSampleTable where id = 
3 group by id"),
+      sql(s"SELECT id, count(id) from $normalTable where id = 3 group by id"))
+
+    sql(s"DROP DATAMAP $bloomOnPreAgg on table 
${bloomDMSampleTable}_${preAggOnBase}")
+    checkAnswer(sql(s"SELECT id, count(id) from $bloomDMSampleTable where id = 
3 group by id"),
+      sql(s"SELECT id, count(id) from $normalTable where id = 3 group by id"))
+
+    sql(
+      s"""
+         | CREATE DATAMAP $bloomOnPreAgg ON TABLE 
${bloomDMSampleTable}_${preAggOnBase}
+         | USING 'bloomfilter'
+         | DMPROPERTIES('INDEX_COLUMNS'='${bloomDMSampleTable}_id')
+       """.stripMargin)
+    checkAnswer(sql(s"SELECT id, count(id) from $bloomDMSampleTable where id = 
3 group by id"),
+      sql(s"SELECT id, count(id) from $normalTable where id = 3 group by id"))
+
+    sql(s"DROP DATAMAP $bloomOnPreAgg on table 
${bloomDMSampleTable}_${preAggOnBase}")
+    checkAnswer(sql(s"SELECT id, count(id) from $bloomDMSampleTable where id = 
3 group by id"),
+      sql(s"SELECT id, count(id) from $normalTable where id = 3 group by id"))
+  }
+
   override def afterAll(): Unit = {
     deleteFile(bigFile)
     sql(s"DROP TABLE IF EXISTS $normalTable")

Reply via email to