dtenedor commented on code in PR #48854:
URL: https://github.com/apache/spark/pull/48854#discussion_r1852876346
##########
sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out:
##########
@@ -495,6 +497,187 @@ org.apache.spark.sql.AnalysisException
}
+-- !query
+table t
+|> extend 1 as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0 abc 1
+1 def 1
+
+
+-- !query
+table t
+|> extend 1
+-- !query schema
+struct<x:int,y:string,pipeexpression(1):int>
+-- !query output
+0 abc 1
+1 def 1
+
+
+-- !query
+table t
+|> extend x as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0 abc 0
+1 def 1
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0 abc 3
+1 def 4
+
+
+-- !query
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0 abc 3 1
+1 def 4 2
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0 abc 3 4
+1 def 4 5
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3} 2
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0 abc NULL
+1 def 1
+
+
+-- !query
+table t
+|> where exists (
+ table other
+ |> extend t.x
+ |> select * except (a, b))
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0 abc
+1 def
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query schema
+struct<x:int,y:string,x:int>
+-- !query output
+0 abc 1
+1 def 1
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query schema
+struct<x:int,y:string,result:int>
+-- !query output
+0 abc 0
+1 def 1
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query schema
+struct<x:int,y:string,z:int,plus_one:int>
+-- !query output
+0 abc 3 4
+1 def 4 5
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+ "sqlState" : "0A000",
+ "messageParameters" : {
+ "clause" : "EXTEND",
+ "expr" : "sum(x#x)"
+ },
+ "queryContext" : [ {
+ "objectType" : "",
+ "objectName" : "",
+ "startIndex" : 19,
+ "stopIndex" : 24,
+ "fragment" : "sum(x)"
+ } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+ "errorClass" : "PARSE_SYNTAX_ERROR",
+ "sqlState" : "42601",
+ "messageParameters" : {
+ "error" : "'as'",
+ "hint" : ""
+ }
+}
+
+
+-- !query
+table t
+|> extend *
Review Comment:
👍 yes we do!
https://github.com/apache/spark/blob/2e1c3dc8004b4f003cde8dfae6857f5bef4bb170/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out#L404C1-L404C12
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala:
##########
@@ -18,26 +18,45 @@
package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
-import org.apache.spark.sql.catalyst.trees.TreePattern.{PIPE_OPERATOR_SELECT,
RUNTIME_REPLACEABLE, TreePattern}
import org.apache.spark.sql.errors.QueryCompilationErrors
/**
- * Represents a SELECT clause when used with the |> SQL pipe operator.
- * We use this to make sure that no aggregate functions exist in the SELECT
expressions.
+ * Represents an expression when used with a SQL pipe operator.
+ * We use this to check invariants about whether aggregate functions may exist
in these expressions.
+ * @param child The child expression.
+ * @param isAggregate Whether the pipe operator is |> AGGREGATE.
+ * If true, the child expression must contain at least one
aggregate function.
+ * If false, the child expression must not contain any
aggregate functions.
+ * @param clause The clause of the pipe operator. This is used to generate
error messages.
*/
-case class PipeSelect(child: Expression)
+case class PipeExpression(child: Expression, isAggregate: Boolean, clause:
String)
extends UnaryExpression with RuntimeReplaceable {
- final override val nodePatterns: Seq[TreePattern] =
Seq(PIPE_OPERATOR_SELECT, RUNTIME_REPLACEABLE)
- override def withNewChildInternal(newChild: Expression): Expression =
PipeSelect(newChild)
- override lazy val replacement: Expression = {
+ override def withNewChildInternal(newChild: Expression): Expression =
+ PipeExpression(newChild, isAggregate, clause)
+ override lazy val replacement: Expression = if (isAggregate) {
+ // Make sure the child expression contains at least one aggregate function.
+ var foundAggregate = false
Review Comment:
Thanks, this is simpler. (I had to keep the recursive method in order to
stop traversing through window expressions, but this logic is better.)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]