[
https://issues.apache.org/jira/browse/SPARK-26246?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16723717#comment-16723717
]
ASF GitHub Bot commented on SPARK-26246:
asfgit closed pull request #23201: [SPARK-26246][SQL] Inferring TimestampType
from JSON
URL: https://github.com/apache/spark/pull/23201
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 263e05de32075..d1bc00c08c1c6 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -28,7 +28,7 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.analysis.TypeCoercion
import org.apache.spark.sql.catalyst.expressions.ExprUtils
import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
-import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode,
ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
@@ -37,6 +37,12 @@ private[sql] class JsonInferSchema(options: JSONOptions)
extends Serializable {
private val decimalParser = ExprUtils.getDecimalParser(options.locale)
+ @transient
+ private lazy val timestampFormatter = TimestampFormatter(
+options.timestampFormat,
+options.timeZone,
+options.locale)
+
/**
* Infer the type of a collection of json records in three stages:
* 1. Infer the type of each record
@@ -115,13 +121,19 @@ private[sql] class JsonInferSchema(options: JSONOptions)
extends Serializable {
// record fields' types have been combined.
NullType
- case VALUE_STRING if options.prefersDecimal =>
+ case VALUE_STRING =>
+val field = parser.getText
val decimalTry = allCatch opt {
- val bigDecimal = decimalParser(parser.getText)
+ val bigDecimal = decimalParser(field)
DecimalType(bigDecimal.precision, bigDecimal.scale)
}
-decimalTry.getOrElse(StringType)
- case VALUE_STRING => StringType
+if (options.prefersDecimal && decimalTry.isDefined) {
+ decimalTry.get
+} else if ((allCatch opt timestampFormatter.parse(field)).isDefined) {
+ TimestampType
+} else {
+ StringType
+}
case START_OBJECT =>
val builder = Array.newBuilder[StructField]
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
new file mode 100644
index 0..9307f9b47b807
--- /dev/null
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.json
+
+import com.fasterxml.jackson.core.JsonFactory
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
+
+ def checkType(options: Map[String, String], json: String, dt: DataType):
Unit = {
+val jsonOptions = new JSONOptions(options, "UTC", "")
+val inferSchema = new JsonInferSchema(jsonOptions)
+val factory = new JsonFactory()
+jsonOptions.setJacksonOptions(factory)
+val parser = CreateJacksonParser.string(factory, json)
+parser.nextToken()
+val expectedType = StructType(Seq(StructField("a", dt, true)))
+
+assert(inferSchema.inferField(parser) === expec