Tanel Kiis created SPARK-38282:
----------------------------------

             Summary: Avoid duplicating complex partitioning expressions
                 Key: SPARK-38282
                 URL: https://issues.apache.org/jira/browse/SPARK-38282
             Project: Spark
          Issue Type: Improvement
          Components: SQL
    Affects Versions: 3.3.0
            Reporter: Tanel Kiis



{code}

  test("SPARK-XXXXX: Avoid duplicating complex partitioning expressions") {
    val group = functions.col("id") % 2
    val min = functions.min("id").over(Window.partitionBy(group))
    val max = functions.max("id").over(Window.partitionBy(group))

    val df1 = spark.range(1, 4)
      .withColumn("ratio", max / min)

    val df2 = spark.range(1, 4)
      .withColumn("min", min)
      .withColumn("max", max)
      .select(col("id"), (col("max") / col("min")).as("ratio"))

    Seq(df1, df2).foreach { df =>
      checkAnswer(
        df,
        Seq(Row(1L, 3.0), Row(2L, 1.0), Row(3L, 3.0)))

      val windows = collect(df.queryExecution.executedPlan) {
        case w: WindowExec => w
      }
      assert(windows.size == 1)
    }
  }
{code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to