fqaiser94 commented on a change in pull request #29795:
URL: https://github.com/apache/spark/pull/29795#discussion_r492435473
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
##########
@@ -39,19 +40,14 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
// Remove redundant field extraction.
case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
createNamedStruct.valExprs(ordinal)
- case GetStructField(w @ WithFields(struct, names, valExprs), ordinal,
maybeName) =>
- val name = w.dataType(ordinal).name
- val matches = names.zip(valExprs).filter(_._1 == name)
- if (matches.nonEmpty) {
- // return last matching element as that is the final value for the
field being extracted.
- // For example, if a user submits a query like this:
- // `$"struct_col".withField("b", lit(1)).withField("b",
lit(2)).getField("b")`
- // we want to return `lit(2)` (and not `lit(1)`).
- val expr = matches.last._2
- If(IsNull(struct), Literal(null, expr.dataType), expr)
- } else {
- GetStructField(struct, ordinal, maybeName)
- }
+ case GetStructField(updateFields: UpdateFields, ordinal, _) =>
+ val structExpr = updateFields.structExpr
+ updateFields.newExprs(ordinal) match {
+ // if the struct itself is null, then any value extracted from it
(expr) will be null
+ // so we don't need to wrap expr in If(IsNull(struct), Literal(null,
expr.dataType), expr)
+ case expr: GetStructField if expr.child.semanticEquals(structExpr) =>
expr
+ case expr => If(IsNull(ultimateStruct(structExpr)), Literal(null,
expr.dataType), expr)
Review comment:
IIUC you mean put `CombineUpdateFields` in a separate batch that runs
before the `SimplifyExtractValueOps` batch and remove the `ultimateStruct`
method?
If so, we'll miss out on some optimizations because after
`SimplifyExtractValueOps` runs we might again end up with code containing
`UpdateFields(UpdateFields(_, _))`. The `simplify add multiple nested fields to
struct` test in `complexTypesSuite` is a good example of a test that will fail
in this scenario.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]