Github user davies commented on a diff in the pull request:
https://github.com/apache/spark/pull/12720#discussion_r61353591
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
---
@@ -75,76 +77,63 @@ case class ScalarSubquery(
override def foldable: Boolean = false
override def nullable: Boolean = true
- override def withNewPlan(plan: LogicalPlan): ScalarSubquery =
ScalarSubquery(plan, exprId)
+ override def conditions: Seq[Expression] = conditionOption.toSeq.flatten
- override def toString: String = s"subquery#${exprId.id}"
+ override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(query
= plan)
+
+ override def toString: String = s"subquery#${exprId.id} $conditionString"
}
/**
* A predicate subquery checks the existence of a value in a sub-query. We
currently only allow
* [[PredicateSubquery]] expressions within a Filter plan (i.e. WHERE or a
HAVING clause). This will
* be rewritten into a left semi/anti join during analysis.
*/
-abstract class PredicateSubquery extends SubqueryExpression with
Unevaluable with Predicate {
+case class PredicateSubquery(
+ query: LogicalPlan,
+ override val children: Seq[Expression] = Seq.empty,
+ nullAware: Boolean = false,
+ exprId: ExprId = NamedExpression.newExprId)
+ extends SubqueryExpression with Predicate with Unevaluable {
+ override lazy val resolved = childrenResolved && query.resolved
+ override lazy val references: AttributeSet = super.references --
query.outputSet
override def nullable: Boolean = false
+ override def conditions: Seq[Expression] = children
+ override def plan: LogicalPlan = SubqueryAlias(toString, query)
+ override def withNewPlan(plan: LogicalPlan): PredicateSubquery =
copy(query = plan)
+ override def toString: String = s"predicate-subquery#${exprId.id}
$conditionString"
}
object PredicateSubquery {
def hasPredicateSubquery(e: Expression): Boolean = {
- e.find(_.isInstanceOf[PredicateSubquery]).isDefined
+ e.find {
+ case _: PredicateSubquery | _: ListQuery | _: Exists => true
+ case _ => false
+ }.isDefined
}
}
/**
- * The [[InSubQuery]] predicate checks the existence of a value in a
sub-query. For example (SQL):
+ * A [[ListQuery]] expression defines the query which we want to search in
an IN subquery
+ * expression. It should and can only be used in conjunction with a IN
expression.
+ *
+ * For example (SQL):
* {{{
* SELECT *
* FROM a
* WHERE a.id IN (SELECT id
* FROM b)
* }}}
*/
-case class InSubQuery(
- value: Expression,
- query: LogicalPlan,
- exprId: ExprId = NamedExpression.newExprId) extends PredicateSubquery {
- override def children: Seq[Expression] = value :: Nil
- override lazy val resolved: Boolean = value.resolved && query.resolved
- override def withNewPlan(plan: LogicalPlan): InSubQuery =
InSubQuery(value, plan, exprId)
- override def plan: LogicalPlan = SubqueryAlias(s"subquery#${exprId.id}",
query)
-
- /**
- * The unwrapped value side expressions.
- */
- lazy val expressions: Seq[Expression] = value match {
- case CreateStruct(cols) => cols
- case col => Seq(col)
- }
-
- /**
- * Check if the number of columns and the data types on both sides match.
- */
- override def checkInputDataTypes(): TypeCheckResult = {
- // Check the number of arguments.
- if (expressions.length != query.output.length) {
- return TypeCheckResult.TypeCheckFailure(
- s"The number of fields in the value (${expressions.length}) does
not match with " +
- s"the number of columns in the subquery
(${query.output.length})")
- }
-
- // Check the argument types.
- expressions.zip(query.output).zipWithIndex.foreach {
- case ((e, a), i) if e.dataType != a.dataType =>
- return TypeCheckResult.TypeCheckFailure(
- s"The data type of value[$i] (${e.dataType}) does not match " +
- s"subquery column '${a.name}' (${a.dataType}).")
- case _ =>
- }
-
- TypeCheckResult.TypeCheckSuccess
- }
-
- override def toString: String = s"$value IN subquery#${exprId.id}"
+case class ListQuery(query: LogicalPlan, exprId: ExprId =
NamedExpression.newExprId)
+ extends SubqueryExpression with Unevaluable {
+ override lazy val resolved = false
+ override def dataType: DataType = ArrayType(NullType)
--- End diff --
for `In(a, Seq(listquery))`, `a` should have same type as `listquery`,
right?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]