bojana-db commented on code in PR #56471:
URL: https://github.com/apache/spark/pull/56471#discussion_r3420584741


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -662,6 +662,148 @@ object VariantGetExpressionBuilder extends 
VariantGetExpressionBuilderBase(true)
 // scalastyle:on line.size.limit
 object TryVariantGetExpressionBuilder extends 
VariantGetExpressionBuilderBase(false)
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(v, path1[, path2, ...]) - Removes fields or array elements 
from a variant at " +
+    "the given JSONPath locations. Multiple paths are applied left to right. 
Returns NULL if " +
+    "`v` is NULL; NULL paths are skipped.",
+  arguments = """
+    Arguments:
+      * v - A variant value to mutate.
+      * path1, path2, ... - One or more string expressions, each evaluating to 
a JSONPath
+          identifying a deletion target. A valid path should start with `$` 
and is followed by
+          one or more segments like `[123]`, `.name`, `['name']`, or 
`["name"]`. The root path
+          `$` is not allowed.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(parse_json('{"a": 1, "b": 2, "c": 3, "items": [1, 2, 
3]}'), NULL, '$.a', '$.c');
+       {"b":2,"items":[1,2,3]}
+      > SELECT _FUNC_(parse_json('{"a": 1, "b": 2, "c": 3, "items": [1, 2, 
3]}'), '$.missing');
+       {"a":1,"b":2,"c":3,"items":[1,2,3]}
+      > SELECT _FUNC_(parse_json('{"a": 1, "b": 2, "c": 3, "items": [1, 2, 
3]}'), '$.items[0]', '$.items[0]');
+       {"a":1,"b":2,"c":3,"items":[3]}
+      > SELECT _FUNC_(NULL, '$.a');
+       NULL
+  """,
+  since = "5.0.0",
+  group = "variant_funcs"
+)
+// scalastyle:on line.size.limit
+case class VariantDelete(children: Seq[Expression])
+    extends Expression
+    with ExpectsInputTypes {
+
+  override def dataType: DataType = VariantType
+
+  override def nullable: Boolean = children.headOption.forall(_.nullable)
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    // First argument is the variant; subsequent arguments are JSONPath 
strings.
+    VariantType +: Seq.fill(math.max(children.length - 1, 0))(
+      StringTypeWithCollation(supportsTrimCollation = true))
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.length < 2) {
+      // `wrongNumArgsError` already quotes the function name via `toSQLId`, 
so pass the raw name.
+      throw QueryCompilationErrors.wrongNumArgsError(
+        prettyName, Seq("> 1"), children.length)
+    }
+    super.checkInputDataTypes()
+  }
+
+  private def variantChild: Expression = children.head
+  private def pathChildren: Seq[Expression] = children.tail
+
+  @transient private lazy val pathArgs: Seq[VariantDelete.DeletePathArg] =
+    pathChildren.flatMap(VariantDelete.toPathArg)
+
+  override def eval(input: InternalRow): Any = {
+    val inputVariant = variantChild.eval(input).asInstanceOf[VariantVal]
+    if (inputVariant == null) return null
+    var current = inputVariant
+    val args = pathArgs
+    var i = 0
+    while (i < args.length) {
+      args(i) match {
+        case parsed: VariantDelete.ParsedDeletePath =>
+          current = VariantExpressionEvalUtils.deleteAtPath(current, 
parsed.javaSegments)

Review Comment:
   Discussed offline, won't be done as part of this PR. Leaving it as a 
possible follow-up.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to