This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 3d8a62d [SPARK-31562][SQL] Update ExpressionDescription for substring, current_date, and current_timestamp 3d8a62d is described below commit 3d8a62ded8a7058f9a292e402e8e6f688d1883c7 Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Sun Apr 26 11:46:52 2020 -0700 [SPARK-31562][SQL] Update ExpressionDescription for substring, current_date, and current_timestamp ### What changes were proposed in this pull request? This PR intends to add entries for substring, current_date, and current_timestamp in the SQL built-in function documents. Specifically, the entries are as follows; - SELECT current_date; - SELECT current_timestamp; - SELECT substring('abcd' FROM 1); - SELECT substring('abcd' FROM 1 FOR 2); ### Why are the changes needed? To make the SQL (built-in functions) references complete. ### Does this PR introduce any user-facing change? <img width="1040" alt="Screen Shot 2020-04-25 at 16 51 07" src="https://user-images.githubusercontent.com/692303/80274851-6ca5ee00-8718-11ea-9a35-9ae82008cb4b.png"> <img width="974" alt="Screen Shot 2020-04-25 at 17 24 24" src="https://user-images.githubusercontent.com/692303/80275032-a88d8300-8719-11ea-92ec-95b80169ae28.png"> <img width="862" alt="Screen Shot 2020-04-25 at 17 27 48" src="https://user-images.githubusercontent.com/692303/80275114-36696e00-871a-11ea-8e39-02e93eabb92f.png"> ### How was this patch tested? Added test examples. Closes #28342 from maropu/SPARK-31562. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit e01125db0d351001dc971e33f4b15403f24d97b4) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../sql/catalyst/analysis/FunctionRegistry.scala | 2 +- .../analysis/UnsupportedOperationChecker.scala | 6 +-- .../catalyst/expressions/datetimeExpressions.scala | 63 +++++++++++++++++----- .../catalyst/expressions/stringExpressions.scala | 12 ++++- .../sql/catalyst/optimizer/finishAnalysis.scala | 2 +- .../sql/expressions/ExpressionInfoSuite.scala | 3 ++ sql/gen-sql-functions-docs.py | 8 +-- 7 files changed, 74 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6cd492f..d3b0731 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -404,7 +404,7 @@ object FunctionRegistry { expression[Month]("month"), expression[MonthsBetween]("months_between"), expression[NextDay]("next_day"), - expression[CurrentTimestamp]("now", true), + expression[Now]("now"), expression[Quarter]("quarter"), expression[Second]("second"), expression[ParseToTimestamp]("to_timestamp"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala index 2f8cb26..423f89f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala @@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID} +import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID, Now} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.OutputMode /** @@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging { subPlan.expressions.foreach { e => if (e.collectLeaves().exists { - case (_: CurrentTimestamp | _: CurrentDate) => true + case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true case _ => false }) { throwError(s"Continuous processing does not support current time operations.") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index f828bd4..8f57421 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -29,7 +29,6 @@ import org.apache.commons.text.StringEscapeUtils import org.apache.spark.SparkUpgradeException import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter} @@ -63,7 +62,21 @@ trait TimeZoneAwareExpression extends Expression { * There is no code generation since this expression should get constant folded by the optimizer. */ @ExpressionDescription( - usage = "_FUNC_() - Returns the current date at the start of query evaluation.", + usage = """ + _FUNC_() - Returns the current date at the start of query evaluation. + + _FUNC_ - Returns the current date at the start of query evaluation. + """, + examples = """ + Examples: + > SELECT _FUNC_(); + 2020-04-25 + > SELECT _FUNC_; + 2020-04-25 + """, + note = """ + The syntax without braces has been supported since 2.0.1. + """, group = "datetime_funcs", since = "1.5.0") case class CurrentDate(timeZoneId: Option[String] = None) @@ -84,6 +97,13 @@ case class CurrentDate(timeZoneId: Option[String] = None) override def prettyName: String = "current_date" } +abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback { + override def foldable: Boolean = true + override def nullable: Boolean = false + override def dataType: DataType = TimestampType + override def eval(input: InternalRow): Any = currentTimestamp() +} + /** * Returns the current timestamp at the start of query evaluation. * All calls of current_timestamp within the same query return the same value. @@ -91,19 +111,38 @@ case class CurrentDate(timeZoneId: Option[String] = None) * There is no code generation since this expression should get constant folded by the optimizer. */ @ExpressionDescription( - usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.", + usage = """ + _FUNC_() - Returns the current timestamp at the start of query evaluation. + + _FUNC_ - Returns the current timestamp at the start of query evaluation. + """, + examples = """ + Examples: + > SELECT _FUNC_(); + 2020-04-25 15:49:11.914 + > SELECT _FUNC_; + 2020-04-25 15:49:11.914 + """, + note = """ + The syntax without braces has been supported since 2.0.1. + """, group = "datetime_funcs", since = "1.5.0") -case class CurrentTimestamp() extends LeafExpression with CodegenFallback { - override def foldable: Boolean = true - override def nullable: Boolean = false - - override def dataType: DataType = TimestampType - - override def eval(input: InternalRow): Any = currentTimestamp() +case class CurrentTimestamp() extends CurrentTimestampLike { + override def prettyName: String = "current_timestamp" +} - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp") +@ExpressionDescription( + usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.", + examples = """ + Examples: + > SELECT _FUNC_(); + 2020-04-25 15:49:11.914 + """, + group = "datetime_funcs", + since = "1.6.0") +case class Now() extends CurrentTimestampLike { + override def prettyName: String = "now" } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 3723680..82b1e5f 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression) */ // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.", + usage = """ + _FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`. + + _FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`. + """, examples = """ Examples: > SELECT _FUNC_('Spark SQL', 5); @@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression) SQL > SELECT _FUNC_('Spark SQL', 5, 1); k + > SELECT _FUNC_('Spark SQL' FROM 5); + k SQL + > SELECT _FUNC_('Spark SQL' FROM -3); + SQL + > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1); + k """, since = "1.5.0") // scalastyle:on line.size.limit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index c79bf3e..80d8582 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)), DateType) }) - case CurrentTimestamp() => currentTime + case CurrentTimestamp() | Now() => currentTime } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala index 9a6fe46..e18514c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { val ignoreSet = Set( // One of examples shows getting the current timestamp "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", + "org.apache.spark.sql.catalyst.expressions.CurrentDate", + "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp", + "org.apache.spark.sql.catalyst.expressions.Now", // Random output without a seed "org.apache.spark.sql.catalyst.expressions.Rand", "org.apache.spark.sql.catalyst.expressions.Randn", diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py index 7f0b2ae..c07734e 100644 --- a/sql/gen-sql-functions-docs.py +++ b/sql/gen-sql-functions-docs.py @@ -104,9 +104,11 @@ def _make_pretty_usage(infos): result.append(" <tbody>") for info in infos: - # Extracts (signature, description) pairs from `info.usage`, e.g., - # the signature is `func(expr)` and the description is `...` in an usage `func(expr) - ...`. - usages = iter(re.split(r"(%s\(.*\)) - " % info.name, info.usage.strip())[1:]) + # Extracts (signature, description) pairs from `info.usage`. + # Expected formats are as follows; + # - `_FUNC_(...) - description`, or + # - `_FUNC_ - description` + usages = iter(re.split(r"(%s.*) - " % info.name, info.usage.strip())[1:]) for (sig, description) in zip(usages, usages): result.append(" <tr>") result.append(" <td>%s</td>" % sig) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org