Repository: carbondata
Updated Branches:
  refs/heads/master a16289786 -> 4612e0031


[CARBONDATA-2746][BloomDataMap] Fix bug for getting datamap file when table has 
multiple datamaps

Currently, if table has multiple bloom datamap and carbon is set to use 
distributed datamap, query will throw an exception when accessing the index 
file, because carbon gets all the datamaps but sets them with same datamap 
schema. The error is appeared when getting the full path of bloom index by 
concating index directory and index column. This PR fix this problem by filter 
the index directories of target datamap when using distributed datamap.

Test shows that lucene is not affected by this. On the other hand, lucene gets 
wrong result if we apply this filter

This closes #2512


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/4612e003
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/4612e003
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/4612e003

Branch: refs/heads/master
Commit: 4612e003186ccc6bae89443043bd0db3463f8fc1
Parents: a162897
Author: Manhua <kevin...@qq.com>
Authored: Mon Jul 16 19:29:07 2018 +0800
Committer: xuchuanyin <xuchuan...@hust.edu.cn>
Committed: Wed Jul 18 09:10:22 2018 +0800

----------------------------------------------------------------------
 .../bloom/BloomCoarseGrainDataMapFactory.java   | 27 +++++++------
 .../lucene/LuceneFineGrainDataMapSuite.scala    |  7 ++++
 .../bloom/BloomCoarseGrainDataMapSuite.scala    | 40 ++++++++++++++++++++
 3 files changed, 62 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
----------------------------------------------------------------------
diff --git 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
index 35ebd20..4b5bc7c 100644
--- 
a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
+++ 
b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java
@@ -278,18 +278,21 @@ public class BloomCoarseGrainDataMapFactory extends 
DataMapFactory<CoarseGrainDa
     }
     if (dataMaps.size() > 0) {
       for (TableDataMap dataMap : dataMaps) {
-        List<CarbonFile> indexFiles;
-        String dmPath = CarbonTablePath
-            .getDataMapStorePath(tablePath, segmentId, 
dataMap.getDataMapSchema().getDataMapName());
-        FileFactory.FileType fileType = FileFactory.getFileType(dmPath);
-        final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType);
-        indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() {
-          @Override
-          public boolean accept(CarbonFile file) {
-            return file.isDirectory();
-          }
-        }));
-        indexDirs.addAll(indexFiles);
+        // different from lucene, bloom only get corresponding directory of 
current datamap
+        if 
(dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) {
+          List<CarbonFile> indexFiles;
+          String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, 
segmentId,
+              dataMap.getDataMapSchema().getDataMapName());
+          FileFactory.FileType fileType = FileFactory.getFileType(dmPath);
+          final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, 
fileType);
+          indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() {
+            @Override
+            public boolean accept(CarbonFile file) {
+              return file.isDirectory();
+            }
+          }));
+          indexDirs.addAll(indexFiles);
+        }
       }
     }
     return indexDirs.toArray(new CarbonFile[0]);

http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
index 657a3eb..aebbde4 100644
--- 
a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
+++ 
b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala
@@ -34,6 +34,10 @@ import 
org.apache.carbondata.core.datamap.status.DataMapStatusManager
 
 class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll {
 
+  val originDistributedDatamapStatus = 
CarbonProperties.getInstance().getProperty(
+    CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
+    CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT
+  )
   val file2 = resourcesPath + "/datamap_input.csv"
 
   override protected def beforeAll(): Unit = {
@@ -908,6 +912,9 @@ class LuceneFineGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll {
     CarbonProperties.getInstance()
       .addProperty(CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS,
         CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS_DEFAULT)
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
+        originDistributedDatamapStatus)
   }
 }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
index 0b0c665..12cd234 100644
--- 
a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
+++ 
b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala
@@ -415,6 +415,46 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with 
BeforeAndAfterAll with
     checkQuery("fakeDm", shouldHit = false)
   }
 
+  test("test create datamaps on different column but hit only one") {
+    val originDistributedDatamapStatus = 
CarbonProperties.getInstance().getProperty(
+      CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
+      CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT
+    )
+
+    CarbonProperties.getInstance()
+      .addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, "true")
+    val datamap1 = "datamap1"
+    val datamap2 = "datamap2"
+    sql(
+      s"""
+         | CREATE TABLE $bloomDMSampleTable(id INT, name STRING, city STRING, 
age INT)
+         | STORED BY 'carbondata'
+         |  """.stripMargin)
+    sql(
+      s"""
+         | CREATE DATAMAP $datamap1 ON TABLE $bloomDMSampleTable
+         | USING 'bloomfilter'
+         | DMProperties('INDEX_COLUMNS'='name', 'BLOOM_SIZE'='64000', 
'BLOOM_FPP'='0.00001')
+      """.stripMargin)
+    sql(
+      s"""
+         | CREATE DATAMAP $datamap2 ON TABLE $bloomDMSampleTable
+         | USING 'bloomfilter'
+         | DMProperties('INDEX_COLUMNS'='city', 'BLOOM_SIZE'='64000', 
'BLOOM_FPP'='0.00001')
+      """.stripMargin)
+
+    sql(
+      s"""
+         | INSERT INTO $bloomDMSampleTable
+         | 
VALUES(5,'a','beijing',21),(6,'b','shanghai',25),(7,'b','guangzhou',28)
+      """.stripMargin)
+    assert(sql(s"SELECT * FROM $bloomDMSampleTable WHERE 
city='shanghai'").count() == 1)
+
+    // recover original setting
+    
CarbonProperties.getInstance().addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP,
+        originDistributedDatamapStatus)
+  }
+
   test("test block change datatype for bloomfilter index datamap") {
     sql(
       s"""

Reply via email to