AngersZhuuuu commented on a change in pull request #29087:
URL: https://github.com/apache/spark/pull/29087#discussion_r551973037
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
##########
@@ -301,6 +302,185 @@ class SparkSqlParserSuite extends AnalysisTest {
List.empty, List.empty, None, None, false)))
}
+ test("SPARK-28227: script transform with row format delimit with
aggregation") {
+ assertEqual(
+ """
+ |SELECT TRANSFORM(a, sum(b), max(c))
+ | ROW FORMAT DELIMITED
+ | FIELDS TERMINATED BY ','
+ | COLLECTION ITEMS TERMINATED BY '#'
+ | MAP KEYS TERMINATED BY '@'
+ | LINES TERMINATED BY '\n'
+ | NULL DEFINED AS 'null'
+ | USING 'cat' AS (a, b, c)
+ | ROW FORMAT DELIMITED
+ | FIELDS TERMINATED BY ','
+ | COLLECTION ITEMS TERMINATED BY '#'
+ | MAP KEYS TERMINATED BY '@'
+ | LINES TERMINATED BY '\n'
+ | NULL DEFINED AS 'NULL'
+ |FROM testData
+ |GROUP BY a
+ |HAVING sum(b) > 10
+ """.stripMargin,
+ ScriptTransformation(
+ Seq(UnresolvedStar(None)),
+ "cat",
+ Seq(AttributeReference("a", StringType)(),
+ AttributeReference("b", StringType)(),
+ AttributeReference("c", StringType)()),
+ UnresolvedHaving(
+ GreaterThan(
+ UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")),
isDistinct = false),
+ Literal(10)),
+ Aggregate(
+ Seq('a),
+ Seq(
+ 'a,
+ UnresolvedAlias(
+ UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")),
isDistinct = false), None),
+ UnresolvedAlias(
+ UnresolvedFunction("max", Seq(UnresolvedAttribute("c")),
isDistinct = false), None)
+ ),
+ UnresolvedRelation(TableIdentifier("testData")))),
+ ScriptInputOutputSchema(
+ Seq(("TOK_TABLEROWFORMATFIELD", ","),
+ ("TOK_TABLEROWFORMATCOLLITEMS", "#"),
+ ("TOK_TABLEROWFORMATMAPKEYS", "@"),
+ ("TOK_TABLEROWFORMATNULL", "null"),
+ ("TOK_TABLEROWFORMATLINES", "\n")),
+ Seq(("TOK_TABLEROWFORMATFIELD", ","),
+ ("TOK_TABLEROWFORMATCOLLITEMS", "#"),
+ ("TOK_TABLEROWFORMATMAPKEYS", "@"),
+ ("TOK_TABLEROWFORMATNULL", "NULL"),
+ ("TOK_TABLEROWFORMATLINES", "\n")), None, None,
Review comment:
> I feel this part is redundant, so could you pull out this part as a
shared variable?
How about current change? merge two UT.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]