Repository: carbondata Updated Branches: refs/heads/master 21c5fb1db -> 0668e7d71
[HOTFIX] Remove carbon-spark2 dependency in carbon-bloom This closes #2229 Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/0668e7d7 Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/0668e7d7 Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/0668e7d7 Branch: refs/heads/master Commit: 0668e7d714bafc9939dd74a961e13b10bdab5494 Parents: 21c5fb1 Author: Jacky Li <[email protected]> Authored: Thu Apr 26 14:31:09 2018 +0800 Committer: QiangCai <[email protected]> Committed: Thu Apr 26 19:36:25 2018 +0800 ---------------------------------------------------------------------- datamap/bloom/pom.xml | 2 +- .../bloom/BloomCoarseGrainDataMapSuite.scala | 127 ------------------- integration/spark2/pom.xml | 6 + .../bloom/BloomCoarseGrainDataMapSuite.scala | 126 ++++++++++++++++++ 4 files changed, 133 insertions(+), 128 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/carbondata/blob/0668e7d7/datamap/bloom/pom.xml ---------------------------------------------------------------------- diff --git a/datamap/bloom/pom.xml b/datamap/bloom/pom.xml index 472df36..4ef8c0a 100644 --- a/datamap/bloom/pom.xml +++ b/datamap/bloom/pom.xml @@ -20,7 +20,7 @@ <dependencies> <dependency> <groupId>org.apache.carbondata</groupId> - <artifactId>carbondata-spark2</artifactId> + <artifactId>carbondata-core</artifactId> <version>${project.version}</version> </dependency> <dependency> http://git-wip-us.apache.org/repos/asf/carbondata/blob/0668e7d7/datamap/bloom/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/bloom/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/datamap/bloom/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala deleted file mode 100644 index 21283fe..0000000 --- a/datamap/bloom/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.datamap.bloom - -import java.io.{File, PrintWriter} -import java.util.UUID - -import scala.util.Random - -import org.apache.spark.sql.Row -import org.apache.spark.sql.test.util.QueryTest -import org.scalatest.BeforeAndAfterAll - -class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { - val inputFile = s"$resourcesPath/bloom_datamap_input.csv" - val normalTable = "carbon_normal" - val bloomDMSampleTable = "carbon_bloom" - val dataMapName = "bloom_dm" - val lineNum = 500000 - - override protected def beforeAll(): Unit = { - createFile(inputFile, line = lineNum, start = 0) - sql(s"DROP TABLE IF EXISTS $normalTable") - sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") - } - - test("test bloom datamap") { - sql( - s""" - | CREATE TABLE $normalTable(id INT, name STRING, city STRING, age INT, - | s1 STRING, s2 STRING, s3 STRING, s4 STRING, s5 STRING, s6 STRING, s7 STRING, s8 STRING) - | STORED BY 'carbondata' TBLPROPERTIES('table_blocksize'='128') - | """.stripMargin) - sql( - s""" - | CREATE TABLE $bloomDMSampleTable(id INT, name STRING, city STRING, age INT, - | s1 STRING, s2 STRING, s3 STRING, s4 STRING, s5 STRING, s6 STRING, s7 STRING, s8 STRING) - | STORED BY 'carbondata' TBLPROPERTIES('table_blocksize'='128') - | """.stripMargin) - sql( - s""" - | CREATE DATAMAP $dataMapName ON TABLE $bloomDMSampleTable - | USING '${classOf[BloomCoarseGrainDataMapFactory].getName}' - | DMProperties('BLOOM_COLUMNS'='city,id', 'BLOOM_SIZE'='640000') - """.stripMargin) - - sql( - s""" - | LOAD DATA LOCAL INPATH '$inputFile' INTO TABLE $normalTable - | OPTIONS('header'='false') - """.stripMargin) - sql( - s""" - | LOAD DATA LOCAL INPATH '$inputFile' INTO TABLE $bloomDMSampleTable - | OPTIONS('header'='false') - """.stripMargin) - - sql(s"show datamap on table $bloomDMSampleTable").show(false) - sql(s"select * from $bloomDMSampleTable where city = 'city_5'").show(false) - sql(s"select * from $bloomDMSampleTable limit 5").show(false) - - checkExistence(sql(s"show datamap on table $bloomDMSampleTable"), true, dataMapName) -// checkAnswer(sql(s"show datamap on table $bloomDMSampleTable"), -// Row(dataMapName, classOf[BloomCoarseGrainDataMapFactory].getName, "(NA)")) - checkAnswer(sql(s"select * from $bloomDMSampleTable where id = 1"), - sql(s"select * from $normalTable where id = 1")) - checkAnswer(sql(s"select * from $bloomDMSampleTable where id = 999"), - sql(s"select * from $normalTable where id = 999")) - checkAnswer(sql(s"select * from $bloomDMSampleTable where city = 'city_1'"), - sql(s"select * from $normalTable where city = 'city_1'")) - checkAnswer(sql(s"select * from $bloomDMSampleTable where city = 'city_999'"), - sql(s"select * from $normalTable where city = 'city_999'")) - checkAnswer(sql(s"select count(distinct id), count(distinct name), count(distinct city)," + - s" count(distinct s1), count(distinct s2) from $bloomDMSampleTable"), - sql(s"select count(distinct id), count(distinct name), count(distinct city)," + - s" count(distinct s1), count(distinct s2) from $normalTable")) - checkAnswer(sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + - s" from $bloomDMSampleTable"), - sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + - s" from $normalTable")) - } - - // todo: will add more tests on bloom datamap, such as exception, delete datamap, show profiler - - override protected def afterAll(): Unit = { - deleteFile(inputFile) - sql(s"DROP TABLE IF EXISTS $normalTable") - sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") - } - - private def createFile(fileName: String, line: Int = 10000, start: Int = 0) = { - if (!new File(fileName).exists()) { - val write = new PrintWriter(new File(fileName)) - for (i <- start until (start + line)) { - write.println( - s"$i,n$i,city_$i,${ Random.nextInt(80) }," + - s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + - s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + - s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + - s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }") - } - write.close() - } - } - - private def deleteFile(fileName: String): Unit = { - val file = new File(fileName) - if (file.exists()) { - file.delete() - } - } -} http://git-wip-us.apache.org/repos/asf/carbondata/blob/0668e7d7/integration/spark2/pom.xml ---------------------------------------------------------------------- diff --git a/integration/spark2/pom.xml b/integration/spark2/pom.xml index 2372539..1afb08f 100644 --- a/integration/spark2/pom.xml +++ b/integration/spark2/pom.xml @@ -55,6 +55,12 @@ <version>${project.version}</version> </dependency> <dependency> + <groupId>org.apache.carbondata</groupId> + <artifactId>carbondata-bloom</artifactId> + <version>${project.version}</version> + <scope>test</scope> + </dependency> + <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-hive-thriftserver_${scala.binary.version}</artifactId> </dependency> http://git-wip-us.apache.org/repos/asf/carbondata/blob/0668e7d7/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala ---------------------------------------------------------------------- diff --git a/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala new file mode 100644 index 0000000..33de06f --- /dev/null +++ b/integration/spark2/src/test/scala/org/apache/carbondata/datamap/bloom/BloomCoarseGrainDataMapSuite.scala @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.datamap.bloom + +import java.io.{File, PrintWriter} +import java.util.UUID + +import scala.util.Random + +import org.apache.spark.sql.test.util.QueryTest +import org.scalatest.BeforeAndAfterAll + +class BloomCoarseGrainDataMapSuite extends QueryTest with BeforeAndAfterAll { + val inputFile = s"$resourcesPath/bloom_datamap_input.csv" + val normalTable = "carbon_normal" + val bloomDMSampleTable = "carbon_bloom" + val dataMapName = "bloom_dm" + val lineNum = 500000 + + override protected def beforeAll(): Unit = { + createFile(inputFile, line = lineNum, start = 0) + sql(s"DROP TABLE IF EXISTS $normalTable") + sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") + } + + test("test bloom datamap") { + sql( + s""" + | CREATE TABLE $normalTable(id INT, name STRING, city STRING, age INT, + | s1 STRING, s2 STRING, s3 STRING, s4 STRING, s5 STRING, s6 STRING, s7 STRING, s8 STRING) + | STORED BY 'carbondata' TBLPROPERTIES('table_blocksize'='128') + | """.stripMargin) + sql( + s""" + | CREATE TABLE $bloomDMSampleTable(id INT, name STRING, city STRING, age INT, + | s1 STRING, s2 STRING, s3 STRING, s4 STRING, s5 STRING, s6 STRING, s7 STRING, s8 STRING) + | STORED BY 'carbondata' TBLPROPERTIES('table_blocksize'='128') + | """.stripMargin) + sql( + s""" + | CREATE DATAMAP $dataMapName ON TABLE $bloomDMSampleTable + | USING '${classOf[BloomCoarseGrainDataMapFactory].getName}' + | DMProperties('BLOOM_COLUMNS'='city,id', 'BLOOM_SIZE'='640000') + """.stripMargin) + + sql( + s""" + | LOAD DATA LOCAL INPATH '$inputFile' INTO TABLE $normalTable + | OPTIONS('header'='false') + """.stripMargin) + sql( + s""" + | LOAD DATA LOCAL INPATH '$inputFile' INTO TABLE $bloomDMSampleTable + | OPTIONS('header'='false') + """.stripMargin) + + sql(s"show datamap on table $bloomDMSampleTable").show(false) + sql(s"select * from $bloomDMSampleTable where city = 'city_5'").show(false) + sql(s"select * from $bloomDMSampleTable limit 5").show(false) + + checkExistence(sql(s"show datamap on table $bloomDMSampleTable"), true, dataMapName) +// checkAnswer(sql(s"show datamap on table $bloomDMSampleTable"), +// Row(dataMapName, classOf[BloomCoarseGrainDataMapFactory].getName, "(NA)")) + checkAnswer(sql(s"select * from $bloomDMSampleTable where id = 1"), + sql(s"select * from $normalTable where id = 1")) + checkAnswer(sql(s"select * from $bloomDMSampleTable where id = 999"), + sql(s"select * from $normalTable where id = 999")) + checkAnswer(sql(s"select * from $bloomDMSampleTable where city = 'city_1'"), + sql(s"select * from $normalTable where city = 'city_1'")) + checkAnswer(sql(s"select * from $bloomDMSampleTable where city = 'city_999'"), + sql(s"select * from $normalTable where city = 'city_999'")) + checkAnswer(sql(s"select count(distinct id), count(distinct name), count(distinct city)," + + s" count(distinct s1), count(distinct s2) from $bloomDMSampleTable"), + sql(s"select count(distinct id), count(distinct name), count(distinct city)," + + s" count(distinct s1), count(distinct s2) from $normalTable")) + checkAnswer(sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + + s" from $bloomDMSampleTable"), + sql(s"select min(id), max(id), min(name), max(name), min(city), max(city)" + + s" from $normalTable")) + } + + // todo: will add more tests on bloom datamap, such as exception, delete datamap, show profiler + + override protected def afterAll(): Unit = { + deleteFile(inputFile) + sql(s"DROP TABLE IF EXISTS $normalTable") + sql(s"DROP TABLE IF EXISTS $bloomDMSampleTable") + } + + private def createFile(fileName: String, line: Int = 10000, start: Int = 0) = { + if (!new File(fileName).exists()) { + val write = new PrintWriter(new File(fileName)) + for (i <- start until (start + line)) { + write.println( + s"$i,n$i,city_$i,${ Random.nextInt(80) }," + + s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + + s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + + s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }," + + s"${ UUID.randomUUID().toString },${ UUID.randomUUID().toString }") + } + write.close() + } + } + + private def deleteFile(fileName: String): Unit = { + val file = new File(fileName) + if (file.exists()) { + file.delete() + } + } +}
