Github user xubo245 commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1857#discussion_r166269467
  
    --- Diff: 
integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/preaggregate/TestPreAggregateLoad.scala
 ---
    @@ -412,8 +430,467 @@ test("check load and select for avg double datatype") 
{
         sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
         sql(s"LOAD DATA LOCAL INPATH '$testData' into table maintable")
         val rows = sql("select age,avg(age) from maintable group by 
age").collect()
    -    sql("create datamap maintbl_douoble on table maintable using 
'preaggregate' as select avg(age) from maintable group by age")
    +    sql("create datamap maintbl_double on table maintable using 
'preaggregate' as select avg(age) from maintable group by age")
         checkAnswer(sql("select age,avg(age) from maintable group by age"), 
rows)
    +    sql("drop table if exists maintable ")
    +  }
    +
    +  def testFunction(): Unit = {
    +    // check answer
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 26)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 35), Row(4, 29)))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test load into main table with pre-aggregate table: double") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age DOUBLE)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: short") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age SHORT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: float") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age FLOAT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +  }
    +
    +  test("test load into main table with pre-aggregate table: bigint") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age BIGINT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    testFunction()
    +
    +  }
    +
    +  test("test load into main table with pre-aggregate table: string") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age STRING)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table")
    +
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, 31), Row(2, 27), Row(3, 70), Row(4, 55)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, 31, 1), Row(2, 27, 1), Row(3, 70, 2), Row(4, 55, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "26")))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, "31"), Row(2, "27"), Row(3, "35"), Row(4, "29")))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(age) from main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(age) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(age) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(age) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(age) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test load into main table with pre-aggregate table: sum string 
column") {
    +    sql(
    +      """
    +        | CREATE TABLE main_table(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age STRING)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    createAllAggregateTables("main_table", "name")
    +    sql(s"LOAD DATA LOCAL INPATH '$testData' INTO TABLE main_table")
    +
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_sum"),
    +      Seq(Row(1, null), Row(2, null), Row(3, null), Row(4, null)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_avg"),
    +      Seq(Row(1, null, 0), Row(2, null, 0), Row(3, null, 0), Row(4, null, 
0)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_count"),
    +      Seq(Row(1, 1), Row(2, 1), Row(3, 2), Row(4, 2)))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_min"),
    +      Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, 
"kunal")))
    +    checkAnswer(sql(s"SELECT * FROM main_table_preagg_max"),
    +      Seq(Row(1, "david"), Row(2, "eason"), Row(3, "jarry"), Row(4, 
"vishal")))
    +
    +    // check select and match or not match pre-aggregate table
    +    checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT id, SUM(name) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, AVG(name) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT id, AVG(name) from main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT id, COUNT(name) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT id, MIN(name) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY 
id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT id, MAX(name) FROM main_table GROUP BY 
id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    // sub query should match pre-aggregate table
    +    checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
    +      true, "main_table_preagg_sum")
    +    checkPreAggTable(sql("SELECT SUM(name) FROM main_table"),
    +      false, "main_table_preagg_avg", "main_table")
    +
    +    checkPreAggTable(sql("SELECT AVG(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_avg")
    +    checkPreAggTable(sql("SELECT AVG(name) from main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_count")
    +    checkPreAggTable(sql("SELECT COUNT(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_min")
    +    checkPreAggTable(sql("SELECT MIN(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +
    +    checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
    +      true, "main_table_preagg_max")
    +    checkPreAggTable(sql("SELECT MAX(name) FROM main_table GROUP BY id"),
    +      false, "main_table_preagg_sum", "main_table")
    +  }
    +
    +  test("test whether all segments are loaded into pre-aggregate table if 
segments are set on main table 2") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    sql("reset")
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 
26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +
    +  test("test whether all segments are loaded into pre-aggregate table if 
segments are set on main table 3") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    sql("reset")
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 
26), Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +  test("test whether all segments are loaded into pre-aggregate table if 
segments are set on main table 4") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    //  check value before set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 52)))
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    //  check value after set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 
52), Row(1, 26)))
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    // reset
    +    sql("reset")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 78)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      true, "segmaintable_preagg_sum")
    +  }
    +
    +
    +  test("test whether all segments are loaded into pre-aggregate table if 
segments are set on main table 5") {
    +    sql("DROP TABLE IF EXISTS segmaintable")
    +    sql(
    +      """
    +        | CREATE TABLE segmaintable(
    +        |     id INT,
    +        |     name STRING,
    +        |     city STRING,
    +        |     age INT)
    +        | STORED BY 'org.apache.carbondata.format'
    +      """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    //  check value before set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 52)))
    +
    +    sql("set carbon.input.segments.default.segmaintable=0")
    +    //  check value after set segments
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +
    +    sql(
    +      s"""
    +         | CREATE DATAMAP preagg_sum
    +         | ON TABLE segmaintable
    +         | USING 'preaggregate'
    +         | AS SELECT id, SUM(age)
    +         | FROM segmaintable
    +         | GROUP BY id
    +       """.stripMargin)
    +    sql(s"INSERT INTO segmaintable VALUES(1, 'xyz', 'bengaluru', 26)")
    +
    +    checkAnswer(sql("SELECT * FROM segmaintable_preagg_sum"), Seq(Row(1, 
52), Row(1, 26)))
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 26)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      false, "segmaintable_preagg_sum")
    +
    +    // set *
    +    sql("set carbon.input.segments.default.segmaintable=*")
    +    checkAnswer(sql(s"SELECT id, SUM(age) FROM segmaintable GROUP BY id"),
    +      Seq(Row(1, 78)))
    +    checkPreAggTable(sql("SELECT id, SUM(age) FROM segmaintable GROUP BY 
id"),
    +      false, "segmaintable_preagg_sum")
    --- End diff --
    
    we should support match pre aggregate table when sql("set 
carbon.input.segments.default.segmaintable=*")


---

Reply via email to