Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/21305#discussion_r207937834
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala ---
@@ -336,4 +337,124 @@ object DataType {
case (fromDataType, toDataType) => fromDataType == toDataType
}
}
+
+ private val SparkGeneratedName = """col\d+""".r
+ private def isSparkGeneratedName(name: String): Boolean = name match {
+ case SparkGeneratedName(_*) => true
+ case _ => false
+ }
+
+ /**
+ * Returns true if the write data type can be read using the read data
type.
+ *
+ * The write type is compatible with the read type if:
+ * - Both types are arrays, the array element types are compatible, and
element nullability is
+ * compatible (read allows nulls or write does not contain nulls).
+ * - Both types are maps and the map key and value types are compatible,
and value nullability
+ * is compatible (read allows nulls or write does not contain nulls).
+ * - Both types are structs and each field in the read struct is present
in the write struct and
+ * compatible (including nullability), or is nullable if the write
struct does not contain the
+ * field. Write-side structs are not compatible if they contain fields
that are not present in
+ * the read-side struct.
+ * - Both types are atomic and the write type can be safely cast to the
read type.
+ *
+ * Extra fields in write-side structs are not allowed to avoid
accidentally writing data that
+ * the read schema will not read, and to ensure map key equality is not
changed when data is read.
+ *
+ * @param write a write-side data type to validate against the read type
+ * @param read a read-side data type
+ * @return true if data written with the write type can be read using
the read type
+ */
+ def canWrite(
+ write: DataType,
+ read: DataType,
+ resolver: Resolver,
+ context: String,
+ addError: String => Unit = (_: String) => {}): Boolean = {
+ (write, read) match {
+ case (wArr: ArrayType, rArr: ArrayType) =>
+ // run compatibility check first to produce all error messages
+ val typesCompatible =
+ canWrite(wArr.elementType, rArr.elementType, resolver, context +
".element", addError)
+
+ if (wArr.containsNull && !rArr.containsNull) {
+ addError(s"Cannot write nullable elements to array of non-nulls:
'$context'")
+ false
+ } else {
+ typesCompatible
+ }
+
+ case (wMap: MapType, rMap: MapType) =>
+ // map keys cannot include data fields not in the read schema
without changing equality when
+ // read. map keys can be missing fields as long as they are
nullable in the read schema.
+
+ // run compatibility check first to produce all error messages
+ val keyCompatible =
+ canWrite(wMap.keyType, rMap.keyType, resolver, context + ".key",
addError)
+ val valueCompatible =
+ canWrite(wMap.valueType, rMap.valueType, resolver, context +
".value", addError)
+ val typesCompatible = keyCompatible && valueCompatible
+
+ if (wMap.valueContainsNull && !rMap.valueContainsNull) {
+ addError(s"Cannot write nullable values to map of non-nulls:
'$context'")
+ false
+ } else {
+ typesCompatible
+ }
+
+ case (StructType(writeFields), StructType(readFields)) =>
+ var fieldCompatible = true
+ readFields.zip(writeFields).foreach {
--- End diff --
the scala `zip` doesn't check the length, we should return false if the
length doesn't match, or call
[Seq#corresponds](https://www.scala-lang.org/api/current/scala/collection/Seq.html#corresponds[B](that:scala.collection.GenSeq[B])(p:(A,B)=%3EBoolean):Boolean)
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]