leanken commented on a change in pull request #29983:
URL: https://github.com/apache/spark/pull/29983#discussion_r503069104
##########
File path:
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
##########
@@ -59,56 +60,115 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils
with TestHiveSingleto
}
test("windowing.q -- 15. testExpressions") {
- // Moved because:
- // - Spark uses a different default stddev (sample instead of pop)
- // - Tiny numerical differences in stddev results.
- // - Different StdDev behavior when n=1 (NaN instead of 0)
- checkAnswer(sql(s"""
- |select p_mfgr,p_name, p_size,
- |rank() over(distribute by p_mfgr sort by p_name) as r,
- |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
- |cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
- |percent_rank() over(distribute by p_mfgr sort by p_name) as pr,
- |ntile(3) over(distribute by p_mfgr sort by p_name) as nt,
- |count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
- |avg(p_size) over(distribute by p_mfgr sort by p_name) as avg,
- |stddev(p_size) over(distribute by p_mfgr sort by p_name) as st,
- |first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv,
- |last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv,
- |first_value(p_size) over w1 as fvW1
- |from part
- |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
- | rows between 2 preceding and 2 following)
+ withSQLConf(SQLConf.LEGACY_CENTRAL_MOMENT_AGG.key -> "true") {
Review comment:
done.
##########
File path:
sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
##########
@@ -59,56 +60,115 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils
with TestHiveSingleto
}
test("windowing.q -- 15. testExpressions") {
- // Moved because:
- // - Spark uses a different default stddev (sample instead of pop)
- // - Tiny numerical differences in stddev results.
- // - Different StdDev behavior when n=1 (NaN instead of 0)
- checkAnswer(sql(s"""
- |select p_mfgr,p_name, p_size,
- |rank() over(distribute by p_mfgr sort by p_name) as r,
- |dense_rank() over(distribute by p_mfgr sort by p_name) as dr,
- |cume_dist() over(distribute by p_mfgr sort by p_name) as cud,
- |percent_rank() over(distribute by p_mfgr sort by p_name) as pr,
- |ntile(3) over(distribute by p_mfgr sort by p_name) as nt,
- |count(p_size) over(distribute by p_mfgr sort by p_name) as ca,
- |avg(p_size) over(distribute by p_mfgr sort by p_name) as avg,
- |stddev(p_size) over(distribute by p_mfgr sort by p_name) as st,
- |first_value(p_size % 5) over(distribute by p_mfgr sort by p_name) as fv,
- |last_value(p_size) over(distribute by p_mfgr sort by p_name) as lv,
- |first_value(p_size) over w1 as fvW1
- |from part
- |window w1 as (distribute by p_mfgr sort by p_mfgr, p_name
- | rows between 2 preceding and 2 following)
+ withSQLConf(SQLConf.LEGACY_CENTRAL_MOMENT_AGG.key -> "true") {
+ // Moved because:
+ // - Spark uses a different default stddev (sample instead of pop)
+ // - Tiny numerical differences in stddev results.
+ // - Different StdDev behavior when n=1 (NaN instead of 0)
Review comment:
done
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
##########
@@ -174,7 +175,9 @@ case class StddevSamp(child: Expression) extends
CentralMomentAgg(child) {
override val evaluateExpression: Expression = {
If(n === 0.0, Literal.create(null, DoubleType),
- If(n === 1.0, Double.NaN, sqrt(m2 / (n - 1.0))))
+ If(n === 1.0,
+ if (SQLConf.get.legacyCentralMomentAgg) Double.NaN else
Literal.create(null, DoubleType),
Review comment:
done
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]