Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21052#discussion_r183190221 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala --- @@ -382,4 +382,34 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared } } } + + test("Simple queries must be working, if CBO is turned on") { + withSQLConf(("spark.sql.cbo.enabled", "true")) { + withTable("TBL1", "TBL") { + import org.apache.spark.sql.functions._ + val df = spark.range(1000L).select('id, + 'id * 2 as "FLD1", + 'id * 12 as "FLD2", + lit("aaa") + 'id as "fld3") + df.write + .mode(SaveMode.Overwrite) + .bucketBy(10, "id", "FLD1", "FLD2") + .sortBy("id", "FLD1", "FLD2") + .saveAsTable("TBL") + spark.sql("ANALYZE TABLE TBL COMPUTE STATISTICS ") + spark.sql("ANALYZE TABLE TBL COMPUTE STATISTICS FOR COLUMNS ID, FLD1, FLD2, FLD3") + val df2 = spark.sql( + """ + SELECT t1.id, t1.fld1, t1.fld2, t1.fld3 + FROM tbl t1 + JOIN tbl t2 on t1.id=t2.id + WHERE t1.fld3 IN (-123.23,321.23) + """.stripMargin) + df2.createTempView("TBL2") + spark.sql("SELECT * FROM tbl2 WHERE fld3 IN ('qqq', 'qwe') ").explain() + } + } + --- End diff -- nit: drop this line
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org