[ https://issues.apache.org/jira/browse/SPARK-38282?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tanel Kiis updated SPARK-38282: ------------------------------- Description: {code} test("SPARK-XXXXX: Avoid duplicating complex partitioning expressions") { val group = functions.col("id") % 2 val min = functions.min("id").over(Window.partitionBy(group)) val max = functions.max("id").over(Window.partitionBy(group)) val df1 = spark.range(1, 4) .withColumn("ratio", max / min) val df2 = spark.range(1, 4) .withColumn("min", min) .withColumn("max", max) .select(col("id"), (col("max") / col("min")).as("ratio")) Seq(df1, df2).foreach { df => checkAnswer( df, Seq(Row(1L, 3.0), Row(2L, 1.0), Row(3L, 3.0))) val windows = collect(df.queryExecution.executedPlan) { case w: WindowExec => w } assert(windows.size == 1) } } {code} was: {code} test("SPARK-XXXXX: Avoid duplicating complex partitioning expressions") { val group = functions.col("id") % 2 val min = functions.min("id").over(Window.partitionBy(group)) val max = functions.max("id").over(Window.partitionBy(group)) val df1 = spark.range(1, 4) .withColumn("ratio", max / min) val df2 = spark.range(1, 4) .withColumn("min", min) .withColumn("max", max) .select(col("id"), (col("max") / col("min")).as("ratio")) Seq(df1, df2).foreach { df => checkAnswer( df, Seq(Row(1L, 3.0), Row(2L, 1.0), Row(3L, 3.0))) val windows = collect(df.queryExecution.executedPlan) { case w: WindowExec => w } assert(windows.size == 1) } } {code} > Avoid duplicating complex partitioning expressions > -------------------------------------------------- > > Key: SPARK-38282 > URL: https://issues.apache.org/jira/browse/SPARK-38282 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 3.3.0 > Reporter: Tanel Kiis > Priority: Major > > {code} > test("SPARK-XXXXX: Avoid duplicating complex partitioning expressions") { > val group = functions.col("id") % 2 > val min = functions.min("id").over(Window.partitionBy(group)) > val max = functions.max("id").over(Window.partitionBy(group)) > val df1 = spark.range(1, 4) > .withColumn("ratio", max / min) > val df2 = spark.range(1, 4) > .withColumn("min", min) > .withColumn("max", max) > .select(col("id"), (col("max") / col("min")).as("ratio")) > Seq(df1, df2).foreach { df => > checkAnswer( > df, > Seq(Row(1L, 3.0), Row(2L, 1.0), Row(3L, 3.0))) > val windows = collect(df.queryExecution.executedPlan) { > case w: WindowExec => w > } > assert(windows.size == 1) > } > } > {code} -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org