Repository: carbondata Updated Branches: refs/heads/master a16289786 -> 4612e0031
[CARBONDATA-2746][BloomDataMap] Fix bug for getting datamap file when table has multiple datamaps Currently, if table has multiple bloom datamap and carbon is set to use distributed datamap, query will throw an exception when accessing the index file, because carbon gets all the datamaps but sets them with same datamap schema. The error is appeared when getting the full path of bloom index by concating index directory and index column. This PR fix this problem by filter the index directories of target datamap when using distributed datamap. Test shows that lucene is not affected by this. On the other hand, lucene gets wrong result if we apply this filter This closes #2512 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/4612e003 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/4612e003 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/4612e003 Branch: refs/heads/master Commit: 4612e003186ccc6bae89443043bd0db3463f8fc1 Parents: a162897 Author: Manhua <kevin...@qq.com> Authored: Mon Jul 16 19:29:07 2018 +0800 Committer: xuchuanyin <xuchuan...@hust.edu.cn> Committed: Wed Jul 18 09:10:22 2018 +0800 ---------------------------------------------------------------------- .../bloom/BloomCoarseGrainDataMapFactory.java | 27 +++++++------ .../lucene/LuceneFineGrainDataMapSuite.scala | 7 ++++ .../bloom/BloomCoarseGrainDataMapSuite.scala | 40 ++++++++++++++++++++ 3 files changed, 62 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java index 35ebd20..4b5bc7c 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java @@ -278,18 +278,21 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory<CoarseGrainDa } if (dataMaps.size() > 0) { for (TableDataMap dataMap : dataMaps) { - List<CarbonFile> indexFiles; - String dmPath = CarbonTablePath - .getDataMapStorePath(tablePath, segmentId, dataMap.getDataMapSchema().getDataMapName()); - FileFactory.FileType fileType = FileFactory.getFileType(dmPath); - final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType); - indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() { - @Override - public boolean accept(CarbonFile file) { - return file.isDirectory(); - } - })); - indexDirs.addAll(indexFiles); + // different from lucene, bloom only get corresponding directory of current datamap + if (dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) { + List<CarbonFile> indexFiles; + String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, segmentId, + dataMap.getDataMapSchema().getDataMapName()); + FileFactory.FileType fileType = FileFactory.getFileType(dmPath); + final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType); + indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + return file.isDirectory(); + } + })); + indexDirs.addAll(indexFiles); + } } } return indexDirs.toArray(new CarbonFile[0]); http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala index 657a3eb..aebbde4 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala @@ -34,6 +34,10 @@ import org.apache.carbondata.core.datamap.status.DataMapStatusManager class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { + val originDistributedDatamapStatus = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, + CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT + ) val file2 = resourcesPath + "/datamap_input.csv" override protected def beforeAll(): Unit = { @@ -908,6 +912,9 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { CarbonProperties.getInstance() .addProperty(CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS, CarbonCommonConstants.CARBON_LUCENE_INDEX_STOP_WORDS_DEFAULT) + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, + originDistributedDatamapStatus) } } http://git-wip-us.apache.org/repos/asf/carbondata/blob/4612e003/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala index 0b0c665..12cd234 100644 --- a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -415,6 +415,46 @@ class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll with checkQuery("fakeDm", shouldHit = false) } + test("test create datamaps on different column but hit only one") { + val originDistributedDatamapStatus = CarbonProperties.getInstance().getProperty( + CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, + CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP_DEFAULT + ) + + CarbonProperties.getInstance() + .addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, "true") + val datamap1 = "datamap1" + val datamap2 = "datamap2" + sql( + s""" + | CREATE TABLE $bloomDMSampleTable(id INT, name STRING, city STRING, age INT) + | STORED BY 'carbondata' + | """.stripMargin) + sql( + s""" + | CREATE DATAMAP $datamap1 ON TABLE $bloomDMSampleTable + | USING 'bloomfilter' + | DMProperties('INDEX_COLUMNS'='name', 'BLOOM_SIZE'='64000', 'BLOOM_FPP'='0.00001') + """.stripMargin) + sql( + s""" + | CREATE DATAMAP $datamap2 ON TABLE $bloomDMSampleTable + | USING 'bloomfilter' + | DMProperties('INDEX_COLUMNS'='city', 'BLOOM_SIZE'='64000', 'BLOOM_FPP'='0.00001') + """.stripMargin) + + sql( + s""" + | INSERT INTO $bloomDMSampleTable + | VALUES(5,'a','beijing',21),(6,'b','shanghai',25),(7,'b','guangzhou',28) + """.stripMargin) + assert(sql(s"SELECT * FROM $bloomDMSampleTable WHERE city='shanghai'").count() == 1) + + // recover original setting + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.USE_DISTRIBUTED_DATAMAP, + originDistributedDatamapStatus) + } + test("test block change datatype for bloomfilter index datamap") { sql( s"""