Re: [PR] [SPARK-49566][SQL] Add SQL pipe syntax for the EXTEND operator [spark]

via GitHub Thu, 21 Nov 2024 13:36:20 -0800


dtenedor commented on code in PR #48854:
URL: https://github.com/apache/spark/pull/48854#discussion_r1852876346



##########
sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out:
##########
@@ -495,6 +497,187 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+table t
+|> extend 1 as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0      abc     1
+1      def     1
+
+
+-- !query
+table t
+|> extend 1
+-- !query schema
+struct<x:int,y:string,pipeexpression(1):int>
+-- !query output
+0      abc     1
+1      def     1
+
+
+-- !query
+table t
+|> extend x as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0      abc     0
+1      def     1
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0      abc     3
+1      def     4
+
+
+-- !query
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0      abc     3       1
+1      def     4       2
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0      abc     3       4
+1      def     4       5
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3}        2
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0      abc     NULL
+1      def     1
+
+
+-- !query
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b))
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0      abc
+1      def
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query schema
+struct<x:int,y:string,x:int>
+-- !query output
+0      abc     1
+1      def     1
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query schema
+struct<x:int,y:string,result:int>
+-- !query output
+0      abc     0
+1      def     1
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query schema
+struct<x:int,y:string,z:int,plus_one:int>
+-- !query output
+0      abc     3       4
+1      def     4       5
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "EXTEND",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> extend *

Review Comment:
   👍  yes we do! 
https://github.com/apache/spark/blob/2e1c3dc8004b4f003cde8dfae6857f5bef4bb170/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out#L404C1-L404C12



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala:
##########
@@ -18,26 +18,45 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
-import org.apache.spark.sql.catalyst.trees.TreePattern.{PIPE_OPERATOR_SELECT, 
RUNTIME_REPLACEABLE, TreePattern}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 
 /**
- * Represents a SELECT clause when used with the |> SQL pipe operator.
- * We use this to make sure that no aggregate functions exist in the SELECT 
expressions.
+ * Represents an expression when used with a SQL pipe operator.
+ * We use this to check invariants about whether aggregate functions may exist 
in these expressions.
+ * @param child The child expression.
+ * @param isAggregate Whether the pipe operator is |> AGGREGATE.
+ *                    If true, the child expression must contain at least one 
aggregate function.
+ *                    If false, the child expression must not contain any 
aggregate functions.
+ * @param clause The clause of the pipe operator. This is used to generate 
error messages.
  */
-case class PipeSelect(child: Expression)
+case class PipeExpression(child: Expression, isAggregate: Boolean, clause: 
String)
   extends UnaryExpression with RuntimeReplaceable {
-  final override val nodePatterns: Seq[TreePattern] = 
Seq(PIPE_OPERATOR_SELECT, RUNTIME_REPLACEABLE)
-  override def withNewChildInternal(newChild: Expression): Expression = 
PipeSelect(newChild)
-  override lazy val replacement: Expression = {
+  override def withNewChildInternal(newChild: Expression): Expression =
+    PipeExpression(newChild, isAggregate, clause)
+  override lazy val replacement: Expression = if (isAggregate) {
+    // Make sure the child expression contains at least one aggregate function.
+    var foundAggregate = false

Review Comment:
   Thanks, this is simpler. (I had to keep the recursive method in order to 
stop traversing through window expressions, but this logic is better.)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-49566][SQL] Add SQL pipe syntax for the EXTEND operator [spark]

Reply via email to