zhengruifeng commented on code in PR #38395:
URL: https://github.com/apache/spark/pull/38395#discussion_r1010073193
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:
##########
@@ -2101,3 +2101,53 @@ object AsOfJoin {
}
}
}
+
+
+/**
+ * A logical plan for summary.
+ */
+case class UnresolvedSummary(
+ child: LogicalPlan,
+ statistics: Seq[String]) extends UnaryNode {
+
+ private lazy val supported =
+ Set("count", "count_distinct", "approx_count_distinct", "mean", "stddev",
"min", "max")
+
+ {
+ // TODO: throw AnalysisException instead
+ require(statistics.nonEmpty)
+ val percentiles = statistics.filter(p => p.endsWith("%")).map { p =>
+ try {
+ p.stripSuffix("%").toDouble / 100.0
+ } catch {
+ case e: NumberFormatException =>
+ throw QueryExecutionErrors.cannotParseStatisticAsPercentileError(p,
e)
+ }
+ }
+ require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in
the range [0, 1]")
+
+ statistics.foreach {
+ case s if supported.contains(s) =>
+ case p if p.endsWith("%") =>
+ case s => throw QueryExecutionErrors.statisticNotRecognizedError(s)
+ }
+ }
+
+ override protected def stringArgs: Iterator[Any] = super.stringArgs.take(5)
+
+ override lazy val resolved = false // Summary will be replaced after being
resolved.
+
+ final override val nodePatterns: Seq[TreePattern] = Seq(SUMMARY)
+
+ override def output: Seq[Attribute] = {
+ AttributeReference("summary", StringType)() +:
+ child.output.flatMap { attr =>
+ if (attr.dataType.isInstanceOf[NumericType] ||
attr.dataType.isInstanceOf[StringType]) {
+ Some(AttributeReference(attr.name, StringType)())
+ } else None
Review Comment:
this is a `flatMap`, and `None` will be ignored
currently datatypes other than `NumericType ` and `StringType ` will be
discarded
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]