bojana-db commented on code in PR #56703:
URL: https://github.com/apache/spark/pull/56703#discussion_r3504457757
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -795,15 +795,149 @@ object VariantDelete {
if (v == null) {
None
} else {
- Some(ParsedDeletePath(
-
VariantExpressionEvalUtils.parseVariantDeletePath(v.asInstanceOf[UTF8String].toString)))
+ Some(ParsedDeletePath(VariantExpressionEvalUtils.parseVariantPath(
+ v.asInstanceOf[UTF8String].toString, "variant_delete")))
}
} else {
Some(DynamicDeletePath(child))
}
}
}
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(v, path, val) - Inserts a value into a variant at the given
JSONPath " +
+ "location. An object path adds a new field (error if it already exists);
an array path " +
+ "inserts at the index, shifting later elements right. Missing intermediate
keys are " +
+ "created. Returns NULL if any argument is NULL.",
+ arguments = """
+ Arguments:
+ * v - A variant value to mutate.
+ * path - A string expression evaluating to a JSONPath identifying the
insertion target. A
+ valid path should start with `$` and is followed by one or more
segments like `[123]`,
+ `.name`, `['name']`, or `["name"]`. The root path `$` is not allowed.
+ * val - Any expression castable to variant.
+ """,
+ examples = """
+ Examples:
+ > SELECT _FUNC_(parse_json('{"a": 1}'), '$.b', 2);
+ {"a":1,"b":2}
+ > SELECT _FUNC_(parse_json('{}'), '$.a.b', 1);
+ {"a":{"b":1}}
+ > SELECT _FUNC_(parse_json('["a","b","c"]'), '$[1]', 'z');
+ ["a","z","b","c"]
+ > SELECT _FUNC_(parse_json('["a","b"]'), '$[5]', 'z');
+ ["a","b",null,null,null,"z"]
+ > SELECT _FUNC_(parse_json('{}'), '$.a', parse_json('{"x":1}'));
+ {"a":{"x":1}}
+ > SELECT _FUNC_(parse_json('{}'), '$.a', parse_json('null'));
+ {"a":null}
+ > SELECT _FUNC_(parse_json('{}'), '$.a', null);
+ NULL
+ """,
+ since = "4.3.0",
+ group = "variant_funcs"
+)
+// scalastyle:on line.size.limit
+case class VariantInsert(input: Expression, path: Expression, value:
Expression)
+ extends TernaryExpression
+ with ExpectsInputTypes
+ with QueryErrorsBase {
+
+ override def first: Expression = input
+ override def second: Expression = path
+ override def third: Expression = value
+
+ override def nullIntolerant: Boolean = true
+
+ override def dataType: DataType = VariantType
+ override def inputTypes: Seq[AbstractDataType] =
+ Seq(VariantType, StringTypeWithCollation(supportsTrimCollation = true),
AnyDataType)
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ val result = super.checkInputDataTypes()
+ if (result.isFailure) {
+ result
+ } else if (value.dataType == NullType) {
+ TypeCheckResult.TypeCheckSuccess
+ } else if (!VariantGet.checkDataType(value.dataType, allowStructsAndMaps =
true)) {
Review Comment:
This is a good point, out of the two proposals I am fine with the second one
(I wouldn't force users to use variant input since it would require an explicit
cast even for plain scalars - e.g. `variant_insert(v, '$.a', cast(2 AS
VARIANT))`, which is an unnecessary degradation of user experience).
I defined value parameter as "any value castable to variant". If the
consensus is to consider value castable only if it satisfies `cast` operator
constraints
([link](https://docs.databricks.com/aws/en/sql/language-manual/functions/cast))
I am happy to make the change. Otherwise, documenting the struct/map behaviour
makes sense.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]