[spark] branch branch-3.0 updated: [SPARK-31562][SQL] Update ExpressionDescription for substring, current_date, and current_timestamp

dongjoon Sun, 26 Apr 2020 11:51:39 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 3d8a62d  [SPARK-31562][SQL] Update ExpressionDescription for 
substring, current_date, and current_timestamp
3d8a62d is described below

commit 3d8a62ded8a7058f9a292e402e8e6f688d1883c7
Author: Takeshi Yamamuro <[email protected]>
AuthorDate: Sun Apr 26 11:46:52 2020 -0700

    [SPARK-31562][SQL] Update ExpressionDescription for substring, 
current_date, and current_timestamp
    
    ### What changes were proposed in this pull request?
    
    This PR intends to add entries for substring, current_date, and 
current_timestamp in the SQL built-in function documents. Specifically, the 
entries are as follows;
    
     - SELECT current_date;
     - SELECT current_timestamp;
     - SELECT substring('abcd' FROM 1);
     - SELECT substring('abcd' FROM 1 FOR 2);
    
    ### Why are the changes needed?
    
    To make the SQL (built-in functions) references complete.
    
    ### Does this PR introduce any user-facing change?
    
    <img width="1040" alt="Screen Shot 2020-04-25 at 16 51 07" 
src="https://user-images.githubusercontent.com/692303/80274851-6ca5ee00-8718-11ea-9a35-9ae82008cb4b.png";>
    
    <img width="974" alt="Screen Shot 2020-04-25 at 17 24 24" 
src="https://user-images.githubusercontent.com/692303/80275032-a88d8300-8719-11ea-92ec-95b80169ae28.png";>
    
    <img width="862" alt="Screen Shot 2020-04-25 at 17 27 48" 
src="https://user-images.githubusercontent.com/692303/80275114-36696e00-871a-11ea-8e39-02e93eabb92f.png";>
    
    ### How was this patch tested?
    
    Added test examples.
    
    Closes #28342 from maropu/SPARK-31562.
    
    Authored-by: Takeshi Yamamuro <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit e01125db0d351001dc971e33f4b15403f24d97b4)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  2 +-
 .../analysis/UnsupportedOperationChecker.scala     |  6 +--
 .../catalyst/expressions/datetimeExpressions.scala | 63 +++++++++++++++++-----
 .../catalyst/expressions/stringExpressions.scala   | 12 ++++-
 .../sql/catalyst/optimizer/finishAnalysis.scala    |  2 +-
 .../sql/expressions/ExpressionInfoSuite.scala      |  3 ++
 sql/gen-sql-functions-docs.py                      |  8 +--
 7 files changed, 74 insertions(+), 22 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6cd492f..d3b0731 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -404,7 +404,7 @@ object FunctionRegistry {
     expression[Month]("month"),
     expression[MonthsBetween]("months_between"),
     expression[NextDay]("next_day"),
-    expression[CurrentTimestamp]("now", true),
+    expression[Now]("now"),
     expression[Quarter]("quarter"),
     expression[Second]("second"),
     expression[ParseToTimestamp]("to_timestamp"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 2f8cb26..423f89f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, 
AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, 
MonotonicallyIncreasingID}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, 
CurrentTimestamp, MonotonicallyIncreasingID, Now}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 
 /**
@@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging {
 
       subPlan.expressions.foreach { e =>
         if (e.collectLeaves().exists {
-          case (_: CurrentTimestamp | _: CurrentDate) => true
+          case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true
           case _ => false
         }) {
           throwError(s"Continuous processing does not support current time 
operations.")
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index f828bd4..8f57421 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -29,7 +29,6 @@ import org.apache.commons.text.StringEscapeUtils
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, 
TimestampFormatter}
@@ -63,7 +62,21 @@ trait TimeZoneAwareExpression extends Expression {
  * There is no code generation since this expression should get constant 
folded by the optimizer.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current date at the start of query 
evaluation.",
+  usage = """
+    _FUNC_() - Returns the current date at the start of query evaluation.
+
+    _FUNC_ - Returns the current date at the start of query evaluation.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       2020-04-25
+      > SELECT _FUNC_;
+       2020-04-25
+  """,
+  note = """
+    The syntax without braces has been supported since 2.0.1.
+  """,
   group = "datetime_funcs",
   since = "1.5.0")
 case class CurrentDate(timeZoneId: Option[String] = None)
@@ -84,6 +97,13 @@ case class CurrentDate(timeZoneId: Option[String] = None)
   override def prettyName: String = "current_date"
 }
 
+abstract class CurrentTimestampLike() extends LeafExpression with 
CodegenFallback {
+  override def foldable: Boolean = true
+  override def nullable: Boolean = false
+  override def dataType: DataType = TimestampType
+  override def eval(input: InternalRow): Any = currentTimestamp()
+}
+
 /**
  * Returns the current timestamp at the start of query evaluation.
  * All calls of current_timestamp within the same query return the same value.
@@ -91,19 +111,38 @@ case class CurrentDate(timeZoneId: Option[String] = None)
  * There is no code generation since this expression should get constant 
folded by the optimizer.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current timestamp at the start of query 
evaluation.",
+  usage = """
+    _FUNC_() - Returns the current timestamp at the start of query evaluation.
+
+    _FUNC_ - Returns the current timestamp at the start of query evaluation.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       2020-04-25 15:49:11.914
+      > SELECT _FUNC_;
+       2020-04-25 15:49:11.914
+  """,
+  note = """
+    The syntax without braces has been supported since 2.0.1.
+  """,
   group = "datetime_funcs",
   since = "1.5.0")
-case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
-  override def foldable: Boolean = true
-  override def nullable: Boolean = false
-
-  override def dataType: DataType = TimestampType
-
-  override def eval(input: InternalRow): Any = currentTimestamp()
+case class CurrentTimestamp() extends CurrentTimestampLike {
+  override def prettyName: String = "current_timestamp"
+}
 
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp")
+@ExpressionDescription(
+  usage = "_FUNC_() - Returns the current timestamp at the start of query 
evaluation.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       2020-04-25 15:49:11.914
+  """,
+  group = "datetime_funcs",
+  since = "1.6.0")
+case class Now() extends CurrentTimestampLike {
+  override def prettyName: String = "now"
 }
 
 /**
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 3723680..82b1e5f 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that 
starts at `pos` and is of length `len`, or the slice of byte array that starts 
at `pos` and is of length `len`.",
+  usage = """
+    _FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at 
`pos` and is of length `len`, or the slice of byte array that starts at `pos` 
and is of length `len`.
+
+    _FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that 
starts at `pos` and is of length `len`, or the slice of byte array that starts 
at `pos` and is of length `len`.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('Spark SQL', 5);
@@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression)
        SQL
       > SELECT _FUNC_('Spark SQL', 5, 1);
        k
+      > SELECT _FUNC_('Spark SQL' FROM 5);
+       k SQL
+      > SELECT _FUNC_('Spark SQL' FROM -3);
+       SQL
+      > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
+       k
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index c79bf3e..80d8582 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
             LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
             DateType)
         })
-      case CurrentTimestamp() => currentTime
+      case CurrentTimestamp() | Now() => currentTime
     }
   }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 9a6fe46..e18514c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with 
SharedSparkSession {
     val ignoreSet = Set(
       // One of examples shows getting the current timestamp
       "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+      "org.apache.spark.sql.catalyst.expressions.CurrentDate",
+      "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
+      "org.apache.spark.sql.catalyst.expressions.Now",
       // Random output without a seed
       "org.apache.spark.sql.catalyst.expressions.Rand",
       "org.apache.spark.sql.catalyst.expressions.Randn",
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index 7f0b2ae..c07734e 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -104,9 +104,11 @@ def _make_pretty_usage(infos):
     result.append("  <tbody>")
 
     for info in infos:
-        # Extracts (signature, description) pairs from `info.usage`, e.g.,
-        # the signature is `func(expr)` and the description is `...` in an 
usage `func(expr) - ...`.
-        usages = iter(re.split(r"(%s\(.*\)) - " % info.name, 
info.usage.strip())[1:])
+        # Extracts (signature, description) pairs from `info.usage`.
+        # Expected formats are as follows;
+        #  - `_FUNC_(...) - description`, or
+        #  - `_FUNC_ - description`
+        usages = iter(re.split(r"(%s.*) - " % info.name, 
info.usage.strip())[1:])
         for (sig, description) in zip(usages, usages):
             result.append("    <tr>")
             result.append("      <td>%s</td>" % sig)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.0 updated: [SPARK-31562][SQL] Update ExpressionDescription for substring, current_date, and current_timestamp

Reply via email to