This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 3d8a62d [SPARK-31562][SQL] Update ExpressionDescription for
substring, current_date, and current_timestamp
3d8a62d is described below
commit 3d8a62ded8a7058f9a292e402e8e6f688d1883c7
Author: Takeshi Yamamuro <[email protected]>
AuthorDate: Sun Apr 26 11:46:52 2020 -0700
[SPARK-31562][SQL] Update ExpressionDescription for substring,
current_date, and current_timestamp
### What changes were proposed in this pull request?
This PR intends to add entries for substring, current_date, and
current_timestamp in the SQL built-in function documents. Specifically, the
entries are as follows;
- SELECT current_date;
- SELECT current_timestamp;
- SELECT substring('abcd' FROM 1);
- SELECT substring('abcd' FROM 1 FOR 2);
### Why are the changes needed?
To make the SQL (built-in functions) references complete.
### Does this PR introduce any user-facing change?
<img width="1040" alt="Screen Shot 2020-04-25 at 16 51 07"
src="https://user-images.githubusercontent.com/692303/80274851-6ca5ee00-8718-11ea-9a35-9ae82008cb4b.png">
<img width="974" alt="Screen Shot 2020-04-25 at 17 24 24"
src="https://user-images.githubusercontent.com/692303/80275032-a88d8300-8719-11ea-92ec-95b80169ae28.png">
<img width="862" alt="Screen Shot 2020-04-25 at 17 27 48"
src="https://user-images.githubusercontent.com/692303/80275114-36696e00-871a-11ea-8e39-02e93eabb92f.png">
### How was this patch tested?
Added test examples.
Closes #28342 from maropu/SPARK-31562.
Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit e01125db0d351001dc971e33f4b15403f24d97b4)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/catalyst/analysis/FunctionRegistry.scala | 2 +-
.../analysis/UnsupportedOperationChecker.scala | 6 +--
.../catalyst/expressions/datetimeExpressions.scala | 63 +++++++++++++++++-----
.../catalyst/expressions/stringExpressions.scala | 12 ++++-
.../sql/catalyst/optimizer/finishAnalysis.scala | 2 +-
.../sql/expressions/ExpressionInfoSuite.scala | 3 ++
sql/gen-sql-functions-docs.py | 8 +--
7 files changed, 74 insertions(+), 22 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6cd492f..d3b0731 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -404,7 +404,7 @@ object FunctionRegistry {
expression[Month]("month"),
expression[MonthsBetween]("months_between"),
expression[NextDay]("next_day"),
- expression[CurrentTimestamp]("now", true),
+ expression[Now]("now"),
expression[Quarter]("quarter"),
expression[Second]("second"),
expression[ParseToTimestamp]("to_timestamp"),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 2f8cb26..423f89f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.internal.Logging
import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute,
AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp,
MonotonicallyIncreasingID}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate,
CurrentTimestamp, MonotonicallyIncreasingID, Now}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.streaming.OutputMode
/**
@@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging {
subPlan.expressions.foreach { e =>
if (e.collectLeaves().exists {
- case (_: CurrentTimestamp | _: CurrentDate) => true
+ case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true
case _ => false
}) {
throwError(s"Continuous processing does not support current time
operations.")
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index f828bd4..8f57421 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -29,7 +29,6 @@ import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.SparkUpgradeException
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats,
TimestampFormatter}
@@ -63,7 +62,21 @@ trait TimeZoneAwareExpression extends Expression {
* There is no code generation since this expression should get constant
folded by the optimizer.
*/
@ExpressionDescription(
- usage = "_FUNC_() - Returns the current date at the start of query
evaluation.",
+ usage = """
+ _FUNC_() - Returns the current date at the start of query evaluation.
+
+ _FUNC_ - Returns the current date at the start of query evaluation.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_();
+ 2020-04-25
+ > SELECT _FUNC_;
+ 2020-04-25
+ """,
+ note = """
+ The syntax without braces has been supported since 2.0.1.
+ """,
group = "datetime_funcs",
since = "1.5.0")
case class CurrentDate(timeZoneId: Option[String] = None)
@@ -84,6 +97,13 @@ case class CurrentDate(timeZoneId: Option[String] = None)
override def prettyName: String = "current_date"
}
+abstract class CurrentTimestampLike() extends LeafExpression with
CodegenFallback {
+ override def foldable: Boolean = true
+ override def nullable: Boolean = false
+ override def dataType: DataType = TimestampType
+ override def eval(input: InternalRow): Any = currentTimestamp()
+}
+
/**
* Returns the current timestamp at the start of query evaluation.
* All calls of current_timestamp within the same query return the same value.
@@ -91,19 +111,38 @@ case class CurrentDate(timeZoneId: Option[String] = None)
* There is no code generation since this expression should get constant
folded by the optimizer.
*/
@ExpressionDescription(
- usage = "_FUNC_() - Returns the current timestamp at the start of query
evaluation.",
+ usage = """
+ _FUNC_() - Returns the current timestamp at the start of query evaluation.
+
+ _FUNC_ - Returns the current timestamp at the start of query evaluation.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_();
+ 2020-04-25 15:49:11.914
+ > SELECT _FUNC_;
+ 2020-04-25 15:49:11.914
+ """,
+ note = """
+ The syntax without braces has been supported since 2.0.1.
+ """,
group = "datetime_funcs",
since = "1.5.0")
-case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
- override def foldable: Boolean = true
- override def nullable: Boolean = false
-
- override def dataType: DataType = TimestampType
-
- override def eval(input: InternalRow): Any = currentTimestamp()
+case class CurrentTimestamp() extends CurrentTimestampLike {
+ override def prettyName: String = "current_timestamp"
+}
- override def prettyName: String =
- getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp")
+@ExpressionDescription(
+ usage = "_FUNC_() - Returns the current timestamp at the start of query
evaluation.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_();
+ 2020-04-25 15:49:11.914
+ """,
+ group = "datetime_funcs",
+ since = "1.6.0")
+case class Now() extends CurrentTimestampLike {
+ override def prettyName: String = "now"
}
/**
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 3723680..82b1e5f 100755
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
- usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that
starts at `pos` and is of length `len`, or the slice of byte array that starts
at `pos` and is of length `len`.",
+ usage = """
+ _FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at
`pos` and is of length `len`, or the slice of byte array that starts at `pos`
and is of length `len`.
+
+ _FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that
starts at `pos` and is of length `len`, or the slice of byte array that starts
at `pos` and is of length `len`.
+ """,
examples = """
Examples:
> SELECT _FUNC_('Spark SQL', 5);
@@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression)
SQL
> SELECT _FUNC_('Spark SQL', 5, 1);
k
+ > SELECT _FUNC_('Spark SQL' FROM 5);
+ k SQL
+ > SELECT _FUNC_('Spark SQL' FROM -3);
+ SQL
+ > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
+ k
""",
since = "1.5.0")
// scalastyle:on line.size.limit
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index c79bf3e..80d8582 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
DateType)
})
- case CurrentTimestamp() => currentTime
+ case CurrentTimestamp() | Now() => currentTime
}
}
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 9a6fe46..e18514c 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with
SharedSparkSession {
val ignoreSet = Set(
// One of examples shows getting the current timestamp
"org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+ "org.apache.spark.sql.catalyst.expressions.CurrentDate",
+ "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
+ "org.apache.spark.sql.catalyst.expressions.Now",
// Random output without a seed
"org.apache.spark.sql.catalyst.expressions.Rand",
"org.apache.spark.sql.catalyst.expressions.Randn",
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index 7f0b2ae..c07734e 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -104,9 +104,11 @@ def _make_pretty_usage(infos):
result.append(" <tbody>")
for info in infos:
- # Extracts (signature, description) pairs from `info.usage`, e.g.,
- # the signature is `func(expr)` and the description is `...` in an
usage `func(expr) - ...`.
- usages = iter(re.split(r"(%s\(.*\)) - " % info.name,
info.usage.strip())[1:])
+ # Extracts (signature, description) pairs from `info.usage`.
+ # Expected formats are as follows;
+ # - `_FUNC_(...) - description`, or
+ # - `_FUNC_ - description`
+ usages = iter(re.split(r"(%s.*) - " % info.name,
info.usage.strip())[1:])
for (sig, description) in zip(usages, usages):
result.append(" <tr>")
result.append(" <td>%s</td>" % sig)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]