Github user xubo245 commented on a diff in the pull request:
https://github.com/apache/carbondata/pull/1857#discussion_r166269467
--- Diff:
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggregateLoad.scala
---
@@ -412,8 +430,467 @@ test("check load and select for avg double datatype")
{
sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
val rows = sql("select age,avg(age) from maintable group by
age").collect()
- sql("create datamap maintbl_douoble on table maintable using
'preaggregate' as select avg(age) from maintable group by age")
+ sql("create datamap maintbl_double on table maintable using
'preaggregate' as select avg(age) from maintable group by age")
checkAnswer(sql("select age,avg(age) from maintable group by age"),
rows)
+ sql("drop table if exists maintable ")
+ }
+
+ def testFunction(): Unit = {
+ // check answer
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 26)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 29)))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test load into main table with pre-aggregate table: double") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age DOUBLE)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: short") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age SHORT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: float") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age FLOAT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+ }
+
+ test("test load into main table with pre-aggregate table: bigint") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age BIGINT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ testFunction()
+
+ }
+
+ test("test load into main table with pre-aggregate table: string") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age STRING)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table")
+
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "26")))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "29")))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test load into main table with pre-aggregate table: sum string
column") {
+ sql(
+ """
+ | CREATE TABLE main_table(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age STRING)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ createAllAggregateTables("main_table", "name")
+ sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
+
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
+ Seq(Row(1, null), Row(2, null), Row(3, null), Row(4, null)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
+ Seq(Row(1, null, 0), Row(2, null, 0), Row(3, null, 0), Row(4, null,
0)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
+ Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
+ Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4,
"kunal")))
+ checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
+ Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4,
"vishal")))
+
+ // check select and match or not match pre-aggregate table
+ checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT id, AVG(name) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT id, AVG(name) from main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY
id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY
id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ // sub query should match pre-aggregate table
+ checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
+ true, "main_table_preagg_sum")
+ checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
+ false, "main_table_preagg_avg", "main_table")
+
+ checkPreAggTable(sql("SELECT AVG(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_avg")
+ checkPreAggTable(sql("SELECT AVG(name) from main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_count")
+ checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_min")
+ checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+
+ checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
+ true, "main_table_preagg_max")
+ checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
+ false, "main_table_preagg_sum", "main_table")
+ }
+
+ test("test whether all segments are loaded into pre-aggregate table if
segments are set on main table 2") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ false, "segmaintable_preagg_sum")
+
+ sql("reset")
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1,
26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+
+ test("test whether all segments are loaded into pre-aggregate table if
segments are set on main table 3") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ sql("reset")
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1,
26), Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+ test("test whether all segments are loaded into pre-aggregate table if
segments are set on main table 4") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ // check value before set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 52)))
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ // check value after set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1,
52), Row(1, 26)))
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ false, "segmaintable_preagg_sum")
+
+ // reset
+ sql("reset")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 78)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ true, "segmaintable_preagg_sum")
+ }
+
+
+ test("test whether all segments are loaded into pre-aggregate table if
segments are set on main table 5") {
+ sql("DROP TABLE IF EXISTS segmaintable")
+ sql(
+ """
+ | CREATE TABLE segmaintable(
+ | id INT,
+ | name STRING,
+ | city STRING,
+ | age INT)
+ | STORED BY 'org.apache.carbondata.format'
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ // check value before set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 52)))
+
+ sql("set carbon.input.segments.default.segmaintable=0")
+ // check value after set segments
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+
+ sql(
+ s"""
+ | CREATE DATAMAP preagg_sum
+ | ON TABLE segmaintable
+ | USING 'preaggregate'
+ | AS SELECT id, SUM(age)
+ | FROM segmaintable
+ | GROUP BY id
+ """.stripMargin)
+ sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
+
+ checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1,
52), Row(1, 26)))
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 26)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ false, "segmaintable_preagg_sum")
+
+ // set *
+ sql("set carbon.input.segments.default.segmaintable=*")
+ checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
+ Seq(Row(1, 78)))
+ checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY
id"),
+ false, "segmaintable_preagg_sum")
--- End diff --
we should support match pre aggregate table when sql("set
carbon.input.segments.default.segmaintable=*")
---