Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/18875#discussion_r137997910
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
---
@@ -26,20 +26,55 @@ import
org.apache.spark.sql.catalyst.expressions.SpecializedGetters
import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils,
MapData}
import org.apache.spark.sql.types._
+/**
+ * `JackGenerator` can only be initialized with a `StructType` or a
`MapType`.
+ * Once it is initialized with `StructType`, it can be used to write out a
struct or an array of
+ * struct. Once it is initialized with ``MapType``, it can be used to
write out a map or an array
+ * of map. An exception will be thrown if trying to write out a struct if
it is initialized with
+ * a `MapType`, and vice verse.
+ */
private[sql] class JacksonGenerator(
- schema: StructType,
+ dataType: DataType,
writer: Writer,
options: JSONOptions) {
// A `ValueWriter` is responsible for writing a field of an
`InternalRow` to appropriate
// JSON data. Here we are using `SpecializedGetters` rather than
`InternalRow` so that
// we can directly access data in `ArrayData` without the help of
`SpecificMutableRow`.
private type ValueWriter = (SpecializedGetters, Int) => Unit
+ // `JackGenerator` can only be initialized with a `StructType` or a
`MapType`.
+ dataType match {
+ case _: StructType | _: MapType =>
+ case _ => throw new UnsupportedOperationException(
+ s"`JacksonGenerator` only supports to be initialized with a
`StructType` " +
+ s"or `MapType` but got ${dataType.simpleString}")
+ }
+
// `ValueWriter`s for all fields of the schema
- private val rootFieldWriters: Array[ValueWriter] =
schema.map(_.dataType).map(makeWriter).toArray
+ private lazy val rootFieldWriters: Array[ValueWriter] = dataType match {
+ case st: StructType => st.map(_.dataType).map(makeWriter).toArray
+ case _ => throw new UnsupportedOperationException(
+ s"Initial type ${dataType.simpleString} must be a struct")
+ }
+
// `ValueWriter` for array data storing rows of the schema.
- private val arrElementWriter: ValueWriter = (arr: SpecializedGetters, i:
Int) => {
- writeObject(writeFields(arr.getStruct(i, schema.length), schema,
rootFieldWriters))
+ private lazy val arrElementWriter: ValueWriter = dataType match {
+ case st: StructType =>
+ (arr: SpecializedGetters, i: Int) => {
+ writeObject(writeFields(arr.getStruct(i, st.length), st,
rootFieldWriters))
+ }
+ case mt: MapType =>
+ (arr: SpecializedGetters, i: Int) => {
+ writeObject(writeMapData(arr.getMap(i), mt, mapElementWriter))
+ }
+ case _ => throw new UnsupportedOperationException(
--- End diff --
We don't need this. `dataType` can only be `StructType` or `MapType`, as we
checked it in constructor.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]