imback82 commented on a change in pull request #31273:
URL: https://github.com/apache/spark/pull/31273#discussion_r561604269
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
##########
@@ -2029,7 +2029,7 @@ class DataSourceV2SQLSuite
test("CREATE VIEW") {
val v = "testcat.ns1.ns2.v"
val e = intercept[AnalysisException] {
- sql(s"CREATE VIEW $v AS SELECT * FROM tab1")
+ sql(s"CREATE VIEW $v AS SELECT 1")
Review comment:
This needs to be updated now that `ResolveSessionCatalog` handles
`CreateViewStatement` only if its `child` is resolved.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -558,20 +560,27 @@ object ViewHelper {
catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]],
Seq[String]) = {
def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
child.collect {
- case UnresolvedRelation(nameParts, _, _) if
catalog.isTempView(nameParts) =>
- Seq(nameParts)
- case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+ case s @ SubqueryAlias(_, view: View) if view.isTempView =>
+ Seq(s.identifier.qualifier :+ s.identifier.name)
+ case s: SubqueryAlias if s.getTagValue(SUBQUERY_TYPE_TAG).exists(_ ==
"tempView") =>
Review comment:
This seems a bit hacky, but `SubqueryAlias(_, view: View)` couldn't
handle all the cases. For example,
```scala
spark.range(10).createTempView("t")
sql("CREATE VIEW v AS SELECT * FROM t")
```
The `child` is:
```
Project [id#16L]
+- SubqueryAlias t
+- Range (0, 10, step=1, splits=Some(2))
```
@cloud-fan, @viirya Any suggestion to capture the view properly? Does it
make sense to add an `isView` field to `SubqueryAlias`?
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1086,7 +1086,7 @@ class Analyzer(override val catalogManager:
CatalogManager)
}
view.copy(child = newChild)
case p @ SubqueryAlias(_, view: View) =>
- p.copy(child = resolveViews(view))
+ p.makeCopy(Array(p.identifier, resolveViews(view)))
Review comment:
This is to copy the tags, but seems hacky since `copy` can be called to
remove the tags in other places. Please check
https://github.com/apache/spark/pull/31273#discussion_r561607183 if it makes
sense to add `isView` to `SubqueryAlias`.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
##########
@@ -1086,7 +1086,7 @@ class Analyzer(override val catalogManager:
CatalogManager)
}
view.copy(child = newChild)
case p @ SubqueryAlias(_, view: View) =>
- p.copy(child = resolveViews(view))
+ p.makeCopy(Array(p.identifier, resolveViews(view)))
Review comment:
This is to copy the tags, but seems hacky since `copy` can be called in
other places, which will remove the tags. Please check
https://github.com/apache/spark/pull/31273#discussion_r561607183 if it makes
sense to add `isView` to `SubqueryAlias`.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -558,20 +560,27 @@ object ViewHelper {
catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]],
Seq[String]) = {
def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
child.collect {
- case UnresolvedRelation(nameParts, _, _) if
catalog.isTempView(nameParts) =>
- Seq(nameParts)
- case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+ case s @ SubqueryAlias(_, view: View) if view.isTempView =>
+ Seq(s.identifier.qualifier :+ s.identifier.name)
+ case s: SubqueryAlias if s.getTagValue(SUBQUERY_TYPE_TAG).exists(_ ==
"tempView") =>
Review comment:
Or is it safe to do: `case s: SubqueryAlias if
catalog.isTempView(s.identifier.name)`?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -558,20 +560,27 @@ object ViewHelper {
catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]],
Seq[String]) = {
def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
child.collect {
- case UnresolvedRelation(nameParts, _, _) if
catalog.isTempView(nameParts) =>
- Seq(nameParts)
- case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+ case s @ SubqueryAlias(_, view: View) if view.isTempView =>
+ Seq(s.identifier.qualifier :+ s.identifier.name)
+ case s: SubqueryAlias if s.getTagValue(SUBQUERY_TYPE_TAG).exists(_ ==
"tempView") =>
Review comment:
Or is it safe to do: `case s: SubqueryAlias if
catalog.isTempView(s.identifier.name)`?
I see the comment "After replacement, it is impossible to detect whether the
SubqueryAlias is added/generated from a temporary view." If we can add a field
to `SubqueryAlias`, this should be easy to handle.
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
##########
@@ -1363,9 +1368,14 @@ class SessionCatalog(
Utils.classForName("org.apache.spark.sql.expressions.UserDefinedAggregateFunction")
if (clsForUDAF.isAssignableFrom(clazz)) {
val cls =
Utils.classForName("org.apache.spark.sql.execution.aggregate.ScalaUDAF")
- val e = cls.getConstructor(classOf[Seq[Expression]], clsForUDAF,
classOf[Int], classOf[Int])
- .newInstance(input,
- clazz.getConstructor().newInstance().asInstanceOf[Object],
Int.box(1), Int.box(1))
+ val e = cls.getConstructor(
Review comment:
Yea, we need to collect temp functions by name. Currently, UDAF doesn't
store a name, so I added it.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -558,20 +560,27 @@ object ViewHelper {
catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]],
Seq[String]) = {
def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
child.collect {
- case UnresolvedRelation(nameParts, _, _) if
catalog.isTempView(nameParts) =>
- Seq(nameParts)
- case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+ case s @ SubqueryAlias(_, view: View) if view.isTempView =>
+ Seq(s.identifier.qualifier :+ s.identifier.name)
+ case s: SubqueryAlias if s.getTagValue(SUBQUERY_TYPE_TAG).exists(_ ==
"tempView") =>
Review comment:
~~Or is it safe to do: `case s: SubqueryAlias if
catalog.isTempView(s.identifier.name)`?~~
I see the comment "After replacement, it is impossible to detect whether the
SubqueryAlias is added/generated from a temporary view." If we can add a field
to `SubqueryAlias`, this should be easy to handle.
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
##########
@@ -558,20 +560,27 @@ object ViewHelper {
catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]],
Seq[String]) = {
def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
child.collect {
- case UnresolvedRelation(nameParts, _, _) if
catalog.isTempView(nameParts) =>
- Seq(nameParts)
- case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+ case s @ SubqueryAlias(_, view: View) if view.isTempView =>
+ Seq(s.identifier.qualifier :+ s.identifier.name)
+ case s: SubqueryAlias if s.getTagValue(SUBQUERY_TYPE_TAG).exists(_ ==
"tempView") =>
Review comment:
Thanks, will try that.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]