fqaiser94 commented on a change in pull request #29322:
URL: https://github.com/apache/spark/pull/29322#discussion_r469535542



##########
File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
##########
@@ -541,57 +541,94 @@ case class StringToMap(text: Expression, pairDelim: 
Expression, keyValueDelim: E
 }
 
 /**
- * Adds/replaces field in struct by name.
+ * Represents an operation to be applied to the fields of a struct.
  */
-case class WithFields(
-    structExpr: Expression,
-    names: Seq[String],
-    valExprs: Seq[Expression]) extends Unevaluable {
+trait StructFieldsOperation {
 
-  assert(names.length == valExprs.length)
+  val resolver: Resolver = SQLConf.get.resolver
+
+  /**
+   * Returns an updated list of expressions which will ultimately be used as 
the children argument
+   * for [[CreateNamedStruct]].
+   */
+  def apply(exprs: Seq[(String, Expression)]): Seq[(String, Expression)]
+}
+
+/**
+ * Add or replace a field by name.
+ */
+case class WithField(name: String, valExpr: Expression)
+  extends Unevaluable with StructFieldsOperation {
+
+  override def apply(exprs: Seq[(String, Expression)]): Seq[(String, 
Expression)] =
+    if (exprs.exists(x => resolver(x._1, name))) {
+      exprs.map {
+        case (existingName, _) if resolver(existingName, name) => (name, 
valExpr)
+        case x => x
+      }
+    } else {
+      exprs :+ (name, valExpr)
+    }
+
+  override def children: Seq[Expression] = valExpr :: Nil
+
+  override def dataType: DataType = throw new UnresolvedException(this, 
"dataType")
+
+  override def nullable: Boolean = throw new UnresolvedException(this, 
"nullable")
+
+  override def prettyName: String = "WithField"

Review comment:
       I think we do have to override it.
   
   For the following query: 
   ```
   sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
   .select($"struct_col".withField("c", 
lit(3)).dropFields("b").as("struct_col"))
   .explain(true)
   ```
   1. With overriding, here's how it shows up in the parsed logical plan: 
   ```
   'Project [update_fields(update_fields('struct_col, WithField(c, 3)), 
DropField(b)) AS struct_col#2]
   ```
   2. Without overriding, here's how it shows up in the parsed logical plan 
(note that it's now lowercase and inconsistent with DropField): 
   ```
   'Project [update_fields(update_fields('struct_col, withfield(c, 3)), 
DropField(b)) AS struct_col#2]
   ```
   3. Alternatively, we could also do this: 
   ```
   'Project [update_fields(update_fields('struct_col, with_field(c, 3)), 
drop_field(b)) AS struct_col#2]
   ```
   Which do you prefer? 
   
   




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to