Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21686#discussion_r199826429
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
---
@@ -744,11 +747,42 @@ case class StructsToJson(
override def inputTypes: Seq[AbstractDataType] =
TypeCollection(ArrayType, StructType) :: Nil
}
+/**
+ * A function infers schema of JSON string.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(json[, options]) - Returns schema in the DDL format of
JSON string.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_('[{"col":0}]');
+ array<struct<col:int>>
+ """)
+case class SchemaOfJson(child: Expression)
+ extends UnaryExpression with String2StringExpression with
CodegenFallback {
+
+ private val jsonOptions = new JSONOptions(Map.empty, "UTC")
+ private val jsonFactory = new JsonFactory()
+
+ override def convert(v: UTF8String): UTF8String = {
+ val dt =
Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, v)) { parser
=>
+ parser.nextToken()
+ inferField(parser, jsonOptions)
+ }
+
+ UTF8String.fromString(dt.catalogString)
+ }
+}
+
object JsonExprUtils {
- def validateSchemaLiteral(exp: Expression): DataType = exp match {
+ def evalSchemaExpr(exp: Expression): DataType = exp match {
case Literal(s, StringType) => DataType.fromDDL(s.toString)
- case e => throw new AnalysisException(s"Expected a string literal
instead of $e")
+ case e @ SchemaOfJson(_: Literal) =>
+ val ddlSchema = e.eval().asInstanceOf[UTF8String]
+ DataType.fromDDL(ddlSchema.toString)
+ case e => throw new AnalysisException(
+ "Schema should be specified in DDL format as a string literal" +
+ s" or output of the schema_of_json function instead of $e")
--- End diff --
minor nit: `schema_of_json ` -> `exp.prettyName`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]