chenhao-db commented on code in PR #45934:
URL: https://github.com/apache/spark/pull/45934#discussion_r1565144329
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala:
##########
@@ -489,3 +492,67 @@ object SchemaOfVariant {
def mergeSchema(t1: DataType, t2: DataType): DataType =
JsonInferSchema.compatibleType(t1, t2, VariantType)
}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+ usage = "_FUNC_(v) - Returns the merged schema in the SQL format of a
variant column.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(parse_json(j)) FROM VALUES ('1'), ('2'), ('3') AS tab(j);
+ BIGINT
+ > SELECT _FUNC_(parse_json(j)) FROM VALUES ('{"a": 1}'), ('{"b":
true}'), ('{"c": 1.23}') AS tab(j);
+ STRUCT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>
+ """,
+ since = "4.0.0",
+ group = "variant_funcs")
+// scalastyle:on line.size.limit
+case class SchemaOfVariantAgg(
+ child: Expression,
+ override val mutableAggBufferOffset: Int,
+ override val inputAggBufferOffset: Int)
+ extends TypedImperativeAggregate[DataType]
+ with ExpectsInputTypes
+ with QueryErrorsBase
+ with UnaryLike[Expression] {
+ def this(child: Expression) = this(child, 0, 0)
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
+
+ override def dataType: DataType = StringType
+
+ override def nullable: Boolean = false
+
+ override def createAggregationBuffer(): DataType = NullType
+
+ override def update(buffer: DataType, input: InternalRow): DataType = {
+ val inputVariant = child.eval(input).asInstanceOf[VariantVal]
+ if (inputVariant != null) {
+ val v = new Variant(inputVariant.getValue, inputVariant.getMetadata)
+ SchemaOfVariant.mergeSchema(buffer, SchemaOfVariant.schemaOf(v))
+ } else {
+ buffer
+ }
+ }
+
+ override def merge(buffer: DataType, input: DataType): DataType =
+ SchemaOfVariant.mergeSchema(buffer, input)
+
+ override def eval(buffer: DataType): Any = UTF8String.fromString(buffer.sql)
+
+ override def serialize(buffer: DataType): Array[Byte] =
buffer.sql.getBytes("UTF-8")
Review Comment:
Makes sense. Changed to json instead.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]