Repository: carbondata Updated Branches: refs/heads/master 68b359e15 -> b0589e502
[CARBONDATA-2859][SDV] Add sdv test cases for bloomfilter datamap add sdv test cases for bloomfilter datamap This closes #2638 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/b0589e50 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/b0589e50 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/b0589e50 Branch: refs/heads/master Commit: b0589e502b447bcf3e9b8615e77500ea9b9600cc Parents: 68b359e Author: xuchuanyin <[email protected]> Authored: Wed Aug 15 15:01:21 2018 +0800 Committer: Zhang Zhichao <[email protected]> Committed: Sat Sep 8 21:16:24 2018 +0800 ---------------------------------------------------------------------- .../generated/BloomFilterDataMapTestCase.scala | 385 +++++++++++++++++++ 1 file changed, 385 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/b0589e50/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala ---------------------------------------------------------------------- diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala new file mode 100644 index 0000000..8acbcd6 --- /dev/null +++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/BloomFilterDataMapTestCase.scala @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.cluster.sdv.generated + +import org.apache.spark.sql.Row +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} + +import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.util.CarbonProperties + +class BloomFilterDataMapTestCase extends QueryTest with BeforeAndAfterEach with BeforeAndAfterAll { + + override protected def beforeAll(): Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + "yyyy-MM-dd") + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + "yyyy-MM-dd HH:mm:ss") + + } + + override protected def afterAll(): Unit = { + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_DATE_FORMAT, + CarbonCommonConstants.CARBON_DATE_DEFAULT_FORMAT) + CarbonProperties.getInstance().addProperty(CarbonCommonConstants.CARBON_TIMESTAMP_FORMAT, + CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT) + } + + private def createAllDataTypeTable(tableName: String): Unit = { + sql(s"DROP TABLE IF EXISTS $tableName") + sql( + s""" + | CREATE TABLE $tableName( + | booleanField boolean, + | shortField smallint, + | intField int, + | bigintField bigint, + | doubleField double, + | stringField string, + | timestampField timestamp, + | decimalField decimal(18,2), + | dateField date, + | charField string, + | floatField float, + | stringDictField string, + | stringSortField string, + | stringLocalDictField string, + | longStringField string + | ) + | STORED BY 'carbondata' + | TBLPROPERTIES( + | 'LONG_STRING_COLUMNS'='longStringField', + | 'SORT_COLUMNS'='stringSortField', + | 'DICTIONARY_INCLUDE'='stringDictField', + | 'local_dictionary_enable'='true', + | 'local_dictionary_threshold'='10000', + | 'local_dictionary_include'='stringLocalDictField', + | 'CACHE_LEVEL'='BLOCKLET') + """.stripMargin) + } + + private def loadAllDataTypeTable(tableName: String): Unit = { + sql( + s""" + | INSERT INTO TABLE $tableName VALUES + | (true,1,11,101,41.4,'string1','2015-04-23 12:01:01',12.34,'2015-04-23','aaa',1.5,'dict1','sort1','local_dict1','longstring1'), + | (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'), + | (true,3,13,163,43.4,'string3','2015-07-26 12:01:06',34.56,'2015-07-26','ccc',3.5,'dict3','sort3','local_dict3','longstring3'), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + sql( + s""" + | INSERT INTO TABLE $tableName VALUES + | (true,${Short.MaxValue - 2},${Int.MinValue + 2},${Long.MaxValue - 2},${Double.MinValue + 2},'string1','2015-04-23 12:01:01',${Double.MinValue + 2},'2015-04-23','aaa',${Float.MaxValue - 2},'dict1','sort1','local_dict1','longstring1'), + | (false,2,12,102,42.4,'string2','2015-05-23 12:01:03',23.45,'2015-05-23','bbb',2.5,'dict2','sort2','local_dict2','longstring2'), + | (true,${Short.MinValue + 2},${Int.MaxValue - 2},${Long.MinValue + 2},${Double.MaxValue - 2},'string3','2015-07-26 12:01:06',${Double.MinValue + 2},'2015-07-26','ccc',${Float.MinValue + 2},'dict3','sort3','local_dict3','longstring3'), + | (NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) + """.stripMargin) + } + + test("create bloomfilter datamap on all datatypes") { + val tableName = "all_data_types" + val dataMapName = "dm_with_all_data_types" + createAllDataTypeTable(tableName) + loadAllDataTypeTable(tableName) + // create datamap on all supported datatype + sql( + s""" + | CREATE DATAMAP $dataMapName ON TABLE $tableName + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField', + | 'BLOOM_SIZE'='6400', + | 'BLOOM_FPP'='0.001', + | 'BLOOM_COMPRESS'='TRUE') + """.stripMargin) + loadAllDataTypeTable(tableName) + checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName) + checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16))) + checkExistence( + sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), + true, "bloomfilter", dataMapName) + checkAnswer( + sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), + Seq(Row(4))) + sql(s"DROP TABLE IF EXISTS $tableName") + } + + test("create bloomfilter datamap on all datatypes with sort columns") { + val tableName = "all_data_types_with_sort_column" + val dataMapName = "dm_with_all_data_types_with_sort_column" + sql(s"DROP TABLE IF EXISTS $tableName") + // double/float/decimal/longstring cannot be sort_columns so we ignore them in SORT_COLUMNS + sql( + s""" + | CREATE TABLE $tableName( + | booleanField boolean, + | shortField smallint, + | intField int, + | bigintField bigint, + | doubleField double, + | stringField string, + | timestampField timestamp, + | decimalField decimal(18,2), + | dateField date, + | charField string, + | floatField float, + | stringDictField string, + | stringSortField string, + | stringLocalDictField string, + | longStringField string + | ) + | STORED BY 'carbondata' + | TBLPROPERTIES( + | 'LONG_STRING_COLUMNS'='longStringField', + | 'DICTIONARY_INCLUDE'='stringDictField', + | 'local_dictionary_enable'='true', + | 'local_dictionary_threshold'='10000', + | 'local_dictionary_include'='stringLocalDictField', + | 'SORT_COLUMNS'='booleanField, shortField, intField, bigintField, stringField, timestampField, dateField, charField, stringDictField, stringSortField, stringLocalDictField') + """.stripMargin) + loadAllDataTypeTable(tableName) + // create datamap on all supported datatype + sql( + s""" + | CREATE DATAMAP $dataMapName ON TABLE $tableName + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField', + | 'BLOOM_SIZE'='6400', + | 'BLOOM_FPP'='0.001', + | 'BLOOM_COMPRESS'='TRUE') + """.stripMargin) + loadAllDataTypeTable(tableName) + checkExistence(sql(s"SHOW DATAMAP ON TABLE $tableName"), true, "bloomfilter", dataMapName) + checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName"), Seq(Row(16))) + checkExistence( + sql(s"EXPLAIN SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), + true, "bloomfilter", dataMapName) + checkAnswer( + sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), + Seq(Row(4))) + sql(s"DROP TABLE IF EXISTS $tableName") + } + + test("test bloom datamap with empty values on index column") { + val normalTable = "normal_table" + val bloomDMSampleTable = "bloom_table" + val dataMapName = "bloom_datamap" + sql(s"DROP TABLE IF EXISTS $normalTable") + sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") + sql(s"CREATE TABLE $normalTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'") + sql(s"CREATE TABLE $bloomDMSampleTable(c1 string, c2 int, c3 string) STORED BY 'carbondata'") + // load data with empty value + sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'") + sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'") + sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'") + sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'") + + sql( + s""" + | CREATE DATAMAP $dataMapName on table $bloomDMSampleTable + | using 'bloomfilter' + | DMPROPERTIES('index_columns'='c1, c2') + """.stripMargin) + + // load data with empty value + sql(s"INSERT INTO $normalTable SELECT '', 1, 'xxx'") + sql(s"INSERT INTO $bloomDMSampleTable SELECT '', 1, 'xxx'") + sql(s"INSERT INTO $normalTable SELECT '', null, 'xxx'") + sql(s"INSERT INTO $bloomDMSampleTable SELECT '', null, 'xxx'") + + // query on null fields + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable"), + sql(s"SELECT * FROM $normalTable")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = null"), + sql(s"SELECT * FROM $normalTable WHERE c1 = null")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE c1 = ''"), + sql(s"SELECT * FROM $normalTable WHERE c1 = ''")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c1)"), + sql(s"SELECT * FROM $normalTable WHERE isNull(c1)")) + checkAnswer(sql(s"SELECT * FROM $bloomDMSampleTable WHERE isNull(c2)"), + sql(s"SELECT * FROM $normalTable WHERE isNull(c2)")) + sql(s"DROP TABLE IF EXISTS $normalTable") + sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") + } + + test("create multiple datamaps vs create on datamap on multiple columns") { + val tableName1 = "all_data_types10" + val tableName2 = "all_data_types20" + val dataMapName1 = "dm_with_all_data_types10" + val dataMapName2Prefix = "dm_with_all_data_types2" + sql(s"DROP TABLE IF EXISTS $tableName1") + sql(s"DROP TABLE IF EXISTS $tableName2") + createAllDataTypeTable(tableName1) + createAllDataTypeTable(tableName2) + loadAllDataTypeTable(tableName1) + loadAllDataTypeTable(tableName2) + // create one datamap on multiple index columns + sql( + s""" + | CREATE DATAMAP $dataMapName1 ON TABLE $tableName1 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='booleanField, shortField, intField, bigintField, doubleField, stringField, timestampField, decimalField, dateField, charField, floatField, stringDictField, stringSortField, stringLocalDictField, longStringField' + | ) + """.stripMargin) + // create multiple datamaps each on one index column + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}0 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='booleanField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}1 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='shortField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}2 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='intField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}3 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='bigintField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}4 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='doubleField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}5 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='stringField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}6 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='timestampField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}7 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='decimalField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}8 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='dateField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}9 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='charField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}10 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='floatField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}11 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='stringDictField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}12 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='stringSortField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}13 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='stringLocalDictField' + | ) + """.stripMargin) + sql( + s""" + | CREATE DATAMAP ${dataMapName2Prefix}14 ON TABLE $tableName2 + | USING 'bloomfilter' + | DMPROPERTIES( + | 'INDEX_COLUMNS'='longStringField' + | ) + """.stripMargin) + + loadAllDataTypeTable(tableName1) + loadAllDataTypeTable(tableName2) + assert(sql(s"SHOW DATAMAP ON TABLE $tableName1").collect().length == 1) + assert(sql(s"SHOW DATAMAP ON TABLE $tableName2").collect().length == 15) + checkAnswer(sql(s"SELECT COUNT(*) FROM $tableName1"), sql(s"SELECT COUNT(*) FROM $tableName2")) + checkExistence( + sql(s"EXPLAIN SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), + true, "bloomfilter", dataMapName1) + val allDataMapsOnTable2 = (0 to 14).map(p => s"$dataMapName2Prefix$p") + val existedString = (allDataMapsOnTable2 :+ "bloomfilter").toArray + checkExistence( + sql(s"EXPLAIN SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND doubleField=42.4 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2'"), + true, existedString: _*) + + checkAnswer( + sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName1 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b"), + sql(s"SELECT COUNT(*) FROM (SELECT * FROM $tableName2 WHERE booleanField=false AND shortField=2 AND intField=12 AND bigintField=102 AND stringField='string2' AND timestampField='2015-05-23 12:01:03' AND decimalField=23.45 AND dateField='2015-05-23' AND charField='bbb' AND floatField=2.5 AND stringDictField='dict2' AND stringSortField='sort2' AND stringLocalDictField= 'local_dict2' AND longStringField='longstring2') b")) + sql(s"DROP TABLE IF EXISTS $tableName1") + sql(s"DROP TABLE IF EXISTS $tableName2") + } +}
