Repository: carbondata Updated Branches: refs/heads/master 4a37e05ca -> 46f0c8517
[CARBONDATA-2747][Lucene] Fix Lucene datamap choosing and DataMapDistributable building 1. choose lucene datamap for query column 2. build DataMapDistributable only for target datamap This closes #2519 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/46f0c851 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/46f0c851 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/46f0c851 Branch: refs/heads/master Commit: 46f0c8517d4e79a402ff6dc8a077f3d3955f39b5 Parents: 4a37e05 Author: Manhua <[email protected]> Authored: Wed Jul 18 10:14:40 2018 +0800 Committer: xuchuanyin <[email protected]> Committed: Fri Jul 20 15:04:25 2018 +0800 ---------------------------------------------------------------------- .../carbondata/core/datamap/DataMapChooser.java | 14 +++++++---- .../bloom/BloomCoarseGrainDataMapFactory.java | 1 - .../lucene/LuceneDataMapFactoryBase.java | 25 +++++++++++--------- .../lucene/LuceneFineGrainDataMapSuite.scala | 9 +++++-- 4 files changed, 30 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java index cf5dffd..68696cf 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java @@ -34,6 +34,7 @@ import org.apache.carbondata.core.datamap.status.DataMapStatusManager; import org.apache.carbondata.core.metadata.schema.table.CarbonTable; import org.apache.carbondata.core.scan.expression.ColumnExpression; import org.apache.carbondata.core.scan.expression.Expression; +import org.apache.carbondata.core.scan.expression.MatchExpression; import org.apache.carbondata.core.scan.expression.logical.AndExpression; import org.apache.carbondata.core.scan.expression.logical.OrExpression; import org.apache.carbondata.core.scan.filter.intf.ExpressionType; @@ -269,6 +270,14 @@ public class DataMapChooser { List<ColumnExpression> columnExpressions) { if (expression instanceof ColumnExpression) { columnExpressions.add((ColumnExpression) expression); + } else if (expression instanceof MatchExpression) { + // this is a special case for lucene + // build a fake ColumnExpression to filter datamaps which contain target column + // a Lucene query string is alike "column:query term" + String[] queryItems = expression.getString().split(":", 2); + if (queryItems.length == 2) { + columnExpressions.add(new ColumnExpression(queryItems[0], null)); + } } else if (expression != null) { List<Expression> children = expression.getChildren(); if (children != null && children.size() > 0) { @@ -303,11 +312,6 @@ public class DataMapChooser { */ private boolean contains(DataMapMeta mapMeta, List<ColumnExpression> columnExpressions, Set<ExpressionType> expressionTypes) { - if (mapMeta.getOptimizedOperation().contains(ExpressionType.TEXT_MATCH) && - expressionTypes.contains(ExpressionType.TEXT_MATCH)) { - // TODO: fix it with right logic - return true; - } if (mapMeta.getIndexedColumns().size() == 0 || columnExpressions.size() == 0) { return false; } http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java index 4b5bc7c..652e1fc 100644 --- a/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java +++ b/datamap/bloom/src/main/java/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapFactory.java @@ -278,7 +278,6 @@ public class BloomCoarseGrainDataMapFactory extends DataMapFactory<CoarseGrainDa } if (dataMaps.size() > 0) { for (TableDataMap dataMap : dataMaps) { - // different from lucene, bloom only get corresponding directory of current datamap if (dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) { List<CarbonFile> indexFiles; String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, segmentId, http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java ---------------------------------------------------------------------- diff --git a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java index cd225f1..3179584 100644 --- a/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java +++ b/datamap/lucene/src/main/java/org/apache/carbondata/datamap/lucene/LuceneDataMapFactoryBase.java @@ -310,17 +310,20 @@ abstract class LuceneDataMapFactoryBase<T extends DataMap> extends DataMapFactor } if (dataMaps.size() > 0) { for (TableDataMap dataMap : dataMaps) { - List<CarbonFile> indexFiles; - String dmPath = CarbonTablePath - .getDataMapStorePath(tablePath, segmentId, dataMap.getDataMapSchema().getDataMapName()); - FileFactory.FileType fileType = FileFactory.getFileType(dmPath); - final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType); - indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() { - @Override public boolean accept(CarbonFile file) { - return file.isDirectory(); - } - })); - indexDirs.addAll(indexFiles); + if (dataMap.getDataMapSchema().getDataMapName().equals(this.dataMapName)) { + List<CarbonFile> indexFiles; + String dmPath = CarbonTablePath.getDataMapStorePath(tablePath, segmentId, + dataMap.getDataMapSchema().getDataMapName()); + FileFactory.FileType fileType = FileFactory.getFileType(dmPath); + final CarbonFile dirPath = FileFactory.getCarbonFile(dmPath, fileType); + indexFiles = Arrays.asList(dirPath.listFiles(new CarbonFileFilter() { + @Override + public boolean accept(CarbonFile file) { + return file.isDirectory(); + } + })); + indexDirs.addAll(indexFiles); + } } } return indexDirs.toArray(new CarbonFile[0]); http://git-wip-us.apache.org/repos/asf/carbondata/blob/46f0c851/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala index aebbde4..b56701b 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/datamap/lucene/LuceneFineGrainDataMapSuite.scala @@ -666,13 +666,13 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { """.stripMargin) sql( s""" - | CREATE DATAMAP dm2 ON TABLE datamap_test5 + | CREATE DATAMAP dm_city ON TABLE datamap_test5 | USING 'lucene' | DMProperties('INDEX_COLUMNS'='city') """.stripMargin) sql( s""" - | CREATE DATAMAP dm1 ON TABLE datamap_test5 + | CREATE DATAMAP dm_name ON TABLE datamap_test5 | USING 'lucene' | DMProperties('INDEX_COLUMNS'='Name') """.stripMargin) @@ -681,6 +681,11 @@ class LuceneFineGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { sql(s"select * from datamap_test5 where name='n10'")) checkAnswer(sql("SELECT * FROM datamap_test5 WHERE TEXT_MATCH('city:c020')"), sql(s"SELECT * FROM datamap_test5 WHERE city='c020'")) + + var explainString = sql("explain select * from datamap_test5 where TEXT_MATCH('name:n10')").collect() + assert(explainString(0).getString(0).contains( + "pruned by FG DataMap\n - name: dm_name\n - provider: lucene")) + sql("DROP TABLE IF EXISTS datamap_test5") }
