[ https://issues.apache.org/jira/browse/SPARK-17739?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Herman van Hovell updated SPARK-17739: -------------------------------------- Summary: Collapse adjacent similar Window operators (was: Collapse adjacent similar Window operations.) > Collapse adjacent similar Window operators > ------------------------------------------ > > Key: SPARK-17739 > URL: https://issues.apache.org/jira/browse/SPARK-17739 > Project: Spark > Issue Type: Bug > Components: SQL > Reporter: Herman van Hovell > > Spark currently does not collapse adjacent windows with the same partitioning > and (similar) sorting. For example: > {noformat} > val df = spark.range(1000).select($"id" % 100 as "grp", $"id", rand() as > "col1", rand() as "col2") > // Add summary statistics for all columns > import org.apache.spark.sql.expressions.Window > val cols = Seq("id", "col1", "col2") > val window = Window.partitionBy($"grp").orderBy($"id") > val result = cols.foldLeft(df) { (base, name) => > base.withColumn(s"${name}_avg", avg(col(name)).over(window)) > .withColumn(s"${name}_stddev", stddev(col(name)).over(window)) > .withColumn(s"${name}_min", min(col(name)).over(window)) > .withColumn(s"${name}_max", max(col(name)).over(window)) > } > {noformat} > Leads to following plan: > {noformat} > == Parsed Logical Plan == > 'Project [*, max('col2) windowspecdefinition('grp, 'id ASC NULLS FIRST, > UnspecifiedFrame) AS col2_max#10313] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295, col2_min#10295] > +- Window [min(col2#10098) windowspecdefinition(grp#10096L, id#10093L > ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_min#10295], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_stddev#10270] > +- Window [stddev_samp(col2#10098) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_stddev#10270], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246, col2_avg#10246] > +- Window [avg(col2#10098) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_avg#10246], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231, col1_max#10231] > +- Window [max(col1#10097) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_max#10231], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217, col1_min#10217] > +- Window > [min(col1#10097) windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, > RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_min#10217], > [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_stddev#10196] > +- Window > [stddev_samp(col1#10097) windowspecdefinition(grp#10096L, id#10093L ASC NULLS > FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col1_stddev#10196], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176] > +- > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176] > +- > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, col1_avg#10176] > > +- Window [avg(col1#10097) windowspecdefinition(grp#10096L, id#10093L ASC > NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col1_avg#10176], [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, id_max#10165L] > > +- Window [max(id#10093L) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS id_max#10165L], [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_min#10155L] > > +- Window [min(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_min#10155L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, _w0#10138, id_stddev#10121, > id_stddev#10121] > > +- Window [stddev_samp(_w0#10138) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_stddev#10121], [grp#10096L], > [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, cast(id#10093L as double) AS _w0#10138] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105] > > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_avg#10105] > > +- Window [avg(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_avg#10105], [grp#10096L], > [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098] > > +- Project [(id#10093L > % cast(100 as bigint)) AS grp#10096L, id#10093L, rand(-2635875643946016190) > AS col1#10097, rand(427589748603966023) AS col2#10098] > > +- Range (0, 1000, > step=1, splits=Some(1)) > == Analyzed Logical Plan == > grp: bigint, id: bigint, col1: double, col2: double, id_avg: double, > id_stddev: double, id_min: bigint, id_max: bigint, col1_avg: double, > col1_stddev: double, col1_min: double, col1_max: double, col2_avg: double, > col2_stddev: double, col2_min: double, col2_max: double > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295, col2_max#10313] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295, col2_max#10313, col2_max#10313] > +- Window [max(col2#10098) windowspecdefinition(grp#10096L, id#10093L ASC > NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_max#10313], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270, col2_min#10295, col2_min#10295] > +- Window [min(col2#10098) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col2_min#10295], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_max#10231, col2_avg#10246, > col2_stddev#10270] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246, col2_stddev#10270] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246, col2_stddev#10270, col2_stddev#10270] > +- Window [stddev_samp(col2#10098) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_stddev#10270], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, > col1_avg#10176, col1_stddev#10196, col1_min#10217, col1_max#10231, > col2_avg#10246] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231, col2_avg#10246] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231, col2_avg#10246, col2_avg#10246] > +- Window [avg(col2#10098) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_avg#10246], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, > id_max#10165L, col1_avg#10176, col1_stddev#10196, col1_min#10217, > col1_max#10231] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217, col1_max#10231] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217, col1_max#10231, col1_max#10231] > +- Window [max(col1#10097) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_max#10231], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_stddev#10121, > id_min#10155L, id_max#10165L, col1_avg#10176, col1_stddev#10196, > col1_min#10217] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_min#10217, col1_min#10217] > +- Window > [min(col1#10097) windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, > RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_min#10217], > [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196] > +- > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196] > +- > Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_stddev#10196, col1_stddev#10196] > +- > Window [stddev_samp(col1#10097) windowspecdefinition(grp#10096L, id#10093L > ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col1_stddev#10196], [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, id_avg#10105, > id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, col1_avg#10176, > col1_avg#10176] > > +- Window [avg(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_avg#10176], [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L] > > +- Project [grp#10096L, id#10093L, col1#10097, col2#10098, > id_avg#10105, id_stddev#10121, id_min#10155L, id_max#10165L, id_max#10165L] > > +- Window [max(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_max#10165L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L] > > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121, id_min#10155L, id_min#10155L] > > +- Window [min(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_min#10155L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, id_stddev#10121] > > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, _w0#10138, id_stddev#10121, > id_stddev#10121] > > +- Window > [stddev_samp(_w0#10138) windowspecdefinition(grp#10096L, id#10093L ASC NULLS > FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > id_stddev#10121], [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, cast(id#10093L as double) AS > _w0#10138] > > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105] > > +- Project [grp#10096L, > id#10093L, col1#10097, col2#10098, id_avg#10105, id_avg#10105] > > +- Window > [avg(id#10093L) windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, > RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS id_avg#10105], > [grp#10096L], [id#10093L ASC NULLS FIRST] > > +- Project > [grp#10096L, id#10093L, col1#10097, col2#10098] > > +- Project > [(id#10093L % cast(100 as bigint)) AS grp#10096L, id#10093L, > rand(-2635875643946016190) AS col1#10097, rand(427589748603966023) AS > col2#10098] > > +- Range > (0, 1000, step=1, splits=Some(1)) > == Optimized Logical Plan == > Window [max(col2#10098) windowspecdefinition(grp#10096L, id#10093L ASC NULLS > FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_max#10313], > [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [min(col2#10098) windowspecdefinition(grp#10096L, id#10093L ASC > NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_min#10295], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [stddev_samp(col2#10098) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col2_stddev#10270], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [avg(col2#10098) windowspecdefinition(grp#10096L, id#10093L > ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_avg#10246], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [max(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_max#10231], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [min(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_min#10217], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [stddev_samp(col1#10097) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_stddev#10196], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Window [avg(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_avg#10176], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [max(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_max#10165L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Window [min(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_min#10155L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121] > +- Window [stddev_samp(_w0#10138) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_stddev#10121], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, cast(id#10093L as double) AS _w0#10138] > +- Window [avg(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_avg#10105], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Project [(id#10093L % 100) AS > grp#10096L, id#10093L, rand(-2635875643946016190) AS col1#10097, > rand(427589748603966023) AS col2#10098] > +- Range (0, 1000, step=1, > splits=Some(1)) > == Physical Plan == > Window [max(col2#10098) windowspecdefinition(grp#10096L, id#10093L ASC NULLS > FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS col2_max#10313], > [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [min(col2#10098) windowspecdefinition(grp#10096L, id#10093L ASC > NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_min#10295], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [stddev_samp(col2#10098) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col2_stddev#10270], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [avg(col2#10098) windowspecdefinition(grp#10096L, id#10093L > ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS > col2_avg#10246], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [max(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_max#10231], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [min(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_min#10217], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [stddev_samp(col1#10097) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS col1_stddev#10196], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Window [avg(col1#10097) windowspecdefinition(grp#10096L, > id#10093L ASC NULLS FIRST, RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) > AS col1_avg#10176], [grp#10096L], [id#10093L ASC NULLS FIRST] > +- Window [max(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_max#10165L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- Window [min(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_min#10155L], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- *Project [grp#10096L, id#10093L, col1#10097, > col2#10098, id_avg#10105, id_stddev#10121] > +- Window [stddev_samp(_w0#10138) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_stddev#10121], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- *Project [grp#10096L, id#10093L, > col1#10097, col2#10098, id_avg#10105, cast(id#10093L as double) AS _w0#10138] > +- Window [avg(id#10093L) > windowspecdefinition(grp#10096L, id#10093L ASC NULLS FIRST, RANGE BETWEEN > UNBOUNDED PRECEDING AND CURRENT ROW) AS id_avg#10105], [grp#10096L], > [id#10093L ASC NULLS FIRST] > +- *Sort [grp#10096L ASC NULLS FIRST, > id#10093L ASC NULLS FIRST], false, 0 > +- Exchange > hashpartitioning(grp#10096L, 200) > +- *Project [(id#10093L % 100) > AS grp#10096L, id#10093L, rand(-2635875643946016190) AS col1#10097, > rand(427589748603966023) AS col2#10098] > +- *Range (0, 1000, step=1, > splits=Some(1)) > {noformat} > I think it is nice to add an optimizer rule which collapses there windows. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org