Repository: carbondata Updated Branches: refs/heads/master d0f88a154 -> b08ef0012
[CARBONDATA-2380][DataMap] Support visible/invisible datamap for performance tuning Support making datamap visible/invisible through session env. Invisible datamap will only be ignored during query and user can still see this datmap and its data will be updated as normal. We can specify a datamap as invisible by 'set carbon.datamap.invisible.dbName.tableName.dataMapName=false' and it is only effective in this session. This closes #2211 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b08ef001 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b08ef001 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b08ef001 Branch: refs/heads/master Commit: b08ef0012078a594502050b287439b6389fa8a8b Parents: d0f88a1 Author: xuchuanyin <[email protected]> Authored: Mon Apr 23 17:30:47 2018 +0800 Committer: Jacky Li <[email protected]> Committed: Thu Apr 26 19:25:36 2018 +0800 ---------------------------------------------------------------------- .../core/constants/CarbonCommonConstants.java | 7 +++ .../carbondata/core/datamap/DataMapChooser.java | 8 +-- .../core/datamap/DataMapStoreManager.java | 51 ++++++++++++++++++-- .../carbondata/core/util/SessionParams.java | 11 +++++ .../testsuite/datamap/CGDataMapTestCase.scala | 34 +++++++++++++ .../execution/command/CarbonHiveCommands.scala | 8 +++ 6 files changed, 111 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java index 4e324fb..54db6e8 100644 --- a/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java +++ b/core/src/main/java/org/apache/carbondata/core/constants/CarbonCommonConstants.java @@ -62,6 +62,13 @@ public final class CarbonCommonConstants { public static final String CARBON_INPUT_SEGMENTS = "carbon.input.segments."; /** + * key prefix for set command. 'carbon.datamap.visible.dbName.tableName.dmName = false' means + * that the query on 'dbName.table' will not use the datamap 'dmName' + */ + @InterfaceStability.Unstable + public static final String CARBON_DATAMAP_VISIBLE = "carbon.datamap.visible."; + + /** * Fetch and validate the segments. * Used for aggregate table load as segment validation is not required. */ http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java index 2334cf7..c3da9c6 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapChooser.java @@ -75,12 +75,12 @@ public class DataMapChooser { Expression expression = resolverIntf.getFilterExpression(); // First check for FG datamaps if any exist List<TableDataMap> allDataMapFG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.FG); + DataMapStoreManager.getInstance().getAllVisibleDataMap(carbonTable, DataMapLevel.FG); ExpressionTuple tuple = selectDataMap(expression, allDataMapFG, resolverIntf); if (tuple.dataMapExprWrapper == null) { // Check for CG datamap List<TableDataMap> allDataMapCG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.CG); + DataMapStoreManager.getInstance().getAllVisibleDataMap(carbonTable, DataMapLevel.CG); tuple = selectDataMap(expression, allDataMapCG, resolverIntf); } if (tuple.dataMapExprWrapper != null) { @@ -102,7 +102,7 @@ public class DataMapChooser { Expression expression = resolverIntf.getFilterExpression(); // First check for FG datamaps if any exist List<TableDataMap> allDataMapFG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.FG); + DataMapStoreManager.getInstance().getAllVisibleDataMap(carbonTable, DataMapLevel.FG); ExpressionTuple tuple = selectDataMap(expression, allDataMapFG, resolverIntf); if (tuple.dataMapExprWrapper != null) { return tuple.dataMapExprWrapper; @@ -121,7 +121,7 @@ public class DataMapChooser { Expression expression = resolverIntf.getFilterExpression(); // Check for CG datamap List<TableDataMap> allDataMapCG = - DataMapStoreManager.getInstance().getAllDataMap(carbonTable, DataMapLevel.CG); + DataMapStoreManager.getInstance().getAllVisibleDataMap(carbonTable, DataMapLevel.CG); ExpressionTuple tuple = selectDataMap(expression, allDataMapCG, resolverIntf); if (tuple.dataMapExprWrapper != null) { return tuple.dataMapExprWrapper; http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java index f1c0321..0ea601b 100644 --- a/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java +++ b/core/src/main/java/org/apache/carbondata/core/datamap/DataMapStoreManager.java @@ -19,6 +19,7 @@ package org.apache.carbondata.core.datamap; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -29,6 +30,7 @@ import org.apache.carbondata.common.exceptions.sql.MalformedDataMapCommandExcept import org.apache.carbondata.common.exceptions.sql.NoSuchDataMapException; import org.apache.carbondata.common.logging.LogService; import org.apache.carbondata.common.logging.LogServiceFactory; +import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.datamap.dev.DataMapFactory; import org.apache.carbondata.core.indexstore.BlockletDetailsFetcher; import org.apache.carbondata.core.indexstore.SegmentPropertiesFetcher; @@ -43,6 +45,8 @@ import org.apache.carbondata.core.mutate.SegmentUpdateDetails; import org.apache.carbondata.core.mutate.UpdateVO; import org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager; import org.apache.carbondata.core.util.CarbonProperties; +import org.apache.carbondata.core.util.CarbonSessionInfo; +import org.apache.carbondata.core.util.ThreadLocalSessionInfo; /** * It maintains all the DataMaps in it. @@ -75,13 +79,12 @@ public final class DataMapStoreManager { } /** - * It gives all datamaps of type @mapType except the default datamap. - * + * It gives all visible datamaps of type @mapType except the default datamap. */ - public List<TableDataMap> getAllDataMap(CarbonTable carbonTable, DataMapLevel mapType) + public List<TableDataMap> getAllVisibleDataMap(CarbonTable carbonTable, DataMapLevel mapType) throws IOException { List<TableDataMap> dataMaps = new ArrayList<>(); - List<TableDataMap> tableIndices = getAllDataMap(carbonTable); + List<TableDataMap> tableIndices = getAllVisibleDataMap(carbonTable); if (tableIndices != null) { for (TableDataMap dataMap : tableIndices) { if (mapType == dataMap.getDataMapFactory().getDataMapType()) { @@ -93,6 +96,30 @@ public final class DataMapStoreManager { } /** + * It only gives the visible datamaps + */ + private List<TableDataMap> getAllVisibleDataMap(CarbonTable carbonTable) throws IOException { + CarbonSessionInfo sessionInfo = ThreadLocalSessionInfo.getCarbonSessionInfo(); + List<TableDataMap> allDataMaps = getAllDataMap(carbonTable); + Iterator<TableDataMap> dataMapIterator = allDataMaps.iterator(); + while (dataMapIterator.hasNext()) { + TableDataMap dataMap = dataMapIterator.next(); + String dbName = carbonTable.getDatabaseName(); + String tableName = carbonTable.getTableName(); + String dmName = dataMap.getDataMapSchema().getDataMapName(); + boolean isDmVisible = sessionInfo.getSessionParams().getProperty( + String.format("%s%s.%s.%s", CarbonCommonConstants.CARBON_DATAMAP_VISIBLE, + dbName, tableName, dmName), "true").trim().equalsIgnoreCase("true"); + if (!isDmVisible) { + LOGGER.warn(String.format("Ignore invisible datamap %s on table %s.%s", + dmName, dbName, tableName)); + dataMapIterator.remove(); + } + } + return allDataMaps; + } + + /** * It gives all datamaps except the default datamap. * * @return @@ -356,6 +383,22 @@ public final class DataMapStoreManager { } /** + * is datamap exist + * @return true if exist, else return false + */ + public boolean isDataMapExist(String dbName, String tableName, String dmName) { + List<TableDataMap> tableDataMaps = allDataMaps.get(dbName + '_' + tableName); + if (tableDataMaps != null) { + for (TableDataMap dm : tableDataMaps) { + if (dm != null && dmName.equalsIgnoreCase(dm.getDataMapSchema().getDataMapName())) { + return true; + } + } + } + return false; + } + + /** * Get the blocklet datamap factory to get the detail information of blocklets * * @param table http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/core/src/main/java/org/apache/carbondata/core/util/SessionParams.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/carbondata/core/util/SessionParams.java b/core/src/main/java/org/apache/carbondata/core/util/SessionParams.java index 58dc218..8544da9 100644 --- a/core/src/main/java/org/apache/carbondata/core/util/SessionParams.java +++ b/core/src/main/java/org/apache/carbondata/core/util/SessionParams.java @@ -28,6 +28,7 @@ import org.apache.carbondata.core.cache.CacheProvider; import org.apache.carbondata.core.constants.CarbonCommonConstants; import org.apache.carbondata.core.constants.CarbonCommonConstantsInternal; import org.apache.carbondata.core.constants.CarbonLoadOptionConstants; +import org.apache.carbondata.core.datamap.DataMapStoreManager; import org.apache.carbondata.core.exception.InvalidConfigurationException; import static org.apache.carbondata.core.constants.CarbonCommonConstants.CARBON_CUSTOM_BLOCK_DISTRIBUTION; @@ -208,6 +209,16 @@ public class SessionParams implements Serializable, Cloneable { isValid = true; } else if (key.startsWith(CarbonCommonConstantsInternal.QUERY_ON_PRE_AGG_STREAMING)) { isValid = true; + } else if (key.startsWith(CarbonCommonConstants.CARBON_DATAMAP_VISIBLE)) { + String[] keyArray = key.split("\\."); + isValid = DataMapStoreManager.getInstance().isDataMapExist( + keyArray[keyArray.length - 3], + keyArray[keyArray.length - 2], + keyArray[keyArray.length - 1]); + if (!isValid) { + throw new InvalidConfigurationException( + String.format("Invalid configuration of %s, datamap does not exist", key)); + } } else { throw new InvalidConfigurationException( "The key " + key + " not supported for dynamic configuration."); http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/CGDataMapTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/CGDataMapTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/CGDataMapTestCase.scala index e428e24..f13ffad 100644 --- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/CGDataMapTestCase.scala +++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/datamap/CGDataMapTestCase.scala @@ -25,6 +25,7 @@ import com.sun.xml.internal.messaging.saaj.util.ByteOutputStream import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll +import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datamap.{DataMapDistributable, DataMapMeta, Segment} import org.apache.carbondata.core.datamap.dev.{DataMapModel, DataMapWriter} import org.apache.carbondata.core.datamap.dev.cgdatamap.{CoarseGrainDataMap, CoarseGrainDataMapFactory} @@ -383,6 +384,39 @@ class CGDataMapTestCase extends QueryTest with BeforeAndAfterAll { sql("select * from normal_test where name='n502670' and city='c2670'")) } + test("test invisible datamap during query") { + val tableName = "datamap_test" + val dataMapName1 = "datamap1" + val dataMapName2 = "datamap2" + sql(s"DROP TABLE IF EXISTS $tableName") + sql( + s""" + | CREATE TABLE $tableName(id INT, name STRING, city STRING, age INT) + | STORED BY 'org.apache.carbondata.format' + | TBLPROPERTIES('SORT_COLUMNS'='city,name', 'SORT_SCOPE'='LOCAL_SORT') + """.stripMargin) + // register datamap writer + sql(s"create datamap $dataMapName1 on table $tableName using '${classOf[CGDataMapFactory].getName}' DMPROPERTIES('indexcolumns'='name')") + sql(s"create datamap $dataMapName2 on table $tableName using '${classOf[CGDataMapFactory].getName}' DMPROPERTIES('indexcolumns'='city')") + sql(s"LOAD DATA LOCAL INPATH '$file2' INTO TABLE $tableName OPTIONS('header'='false')") + + // make datamap1 invisible + sql(s"set ${CarbonCommonConstants.CARBON_DATAMAP_VISIBLE}default.$tableName.$dataMapName1 = false") + checkAnswer(sql(s"select * from $tableName where name='n502670' and city='c2670'"), + sql("select * from normal_test where name='n502670' and city='c2670'")) + + // also make datamap2 invisible + sql(s"set ${CarbonCommonConstants.CARBON_DATAMAP_VISIBLE}default.$tableName.$dataMapName2 = false") + checkAnswer(sql(s"select * from $tableName where name='n502670' and city='c2670'"), + sql("select * from normal_test where name='n502670' and city='c2670'")) + + // make datamap1,datamap2 visible + sql(s"set ${CarbonCommonConstants.CARBON_DATAMAP_VISIBLE}default.$tableName.$dataMapName1 = true") + sql(s"set ${CarbonCommonConstants.CARBON_DATAMAP_VISIBLE}default.$tableName.$dataMapName1 = true") + checkAnswer(sql(s"select * from $tableName where name='n502670' and city='c2670'"), + sql("select * from normal_test where name='n502670' and city='c2670'")) + } + test("test datamap storage in system folder") { sql("DROP TABLE IF EXISTS datamap_store_test") sql( http://git-wip-us.apache.org/repos/asf/carbondata/blob/b08ef001/integration/spark2/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala index 10230a3..cce23dc 100644 --- a/integration/spark2/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala +++ b/integration/spark2/src/main/scala/org/apache/spark/sql/hive/execution/command/CarbonHiveCommands.scala @@ -107,6 +107,14 @@ object CarbonSetCommand { sessionParams.addProperty(key.toLowerCase(), value) } else if (key.startsWith(CarbonCommonConstantsInternal.QUERY_ON_PRE_AGG_STREAMING)) { sessionParams.addProperty(key.toLowerCase(), value) + } else if (key.startsWith(CarbonCommonConstants.CARBON_DATAMAP_VISIBLE)) { + if (key.split("\\.").length == 6) { + sessionParams.addProperty(key.toLowerCase, value) + } else { + throw new MalformedCarbonCommandException("property should be in " + + "\" carbon.datamap.visible.<database_name>.<table_name>.<database_name>" + + " = <true/false> \" format") + } } }
