Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20224#discussion_r163449472 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala --- @@ -228,4 +229,21 @@ class WholeStageCodegenSuite extends QueryTest with SharedSQLContext { } } } + + test("including codegen stage ID in generated class name should not regress codegen caching") { + import testImplicits._ + + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") { + val bytecodeSizeHisto = CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE + spark.range(3).select('id + 2).collect + val after1 = bytecodeSizeHisto.getCount + spark.range(3).select('id + 2).collect + val after2 = bytecodeSizeHisto.getCount // same query shape as above, deliberately + assert(after1 == after2, "the same query run twice should hit the codegen cache") + + spark.range(5).select('id * 2).collect + val after3 = bytecodeSizeHisto.getCount + assert(after3 >= after2, "a different query can result in codegen cache miss, that's okay") --- End diff -- `after3 > after2`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org