lw309637554 commented on a change in pull request #2776:
URL: https://github.com/apache/hudi/pull/2776#discussion_r608359051
##########
File path:
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala
##########
@@ -669,4 +669,90 @@ class TestCOWDataSource extends HoodieClientTestBase {
.load(basePath)
assertEquals(N + 1, hoodieIncViewDF1.count())
}
+
+ @Test def testSchemaEvolution(): Unit = {
+ // open the schema validate
+ val opts = commonOpts ++ Map("hoodie.avro.schema.validate" -> "true")
+ // 1. write records with schema1
+ val schema1 = StructType(StructField("_row_key", StringType, true) ::
StructField("name", StringType, true)::
+ StructField("timestamp", IntegerType, true) :: StructField("partition",
IntegerType, true)::Nil)
+ val records1 = Seq(Row("1", "Andy", 1, 1),
+ Row("2", "lisi", 1, 1),
+ Row("3", "zhangsan", 1, 1))
+ val rdd = jsc.parallelize(records1)
+ val recordsDF = spark.createDataFrame(rdd, schema1)
+ recordsDF.write.format("org.apache.hudi")
+ .options(opts)
+ .mode(SaveMode.Append)
+ .save(basePath)
+
+
+ // 2. write records with schema2 add column age
+ val schema2 = StructType(StructField("_row_key", StringType, true) ::
StructField("name", StringType, true) ::
+ StructField("age", StringType, true) :: StructField("timestamp",
IntegerType, true) ::
+ StructField("partition", IntegerType, true)::Nil)
+
+ val records2 = Seq(Row("11", "Andy", "10", 1, 1),
+ Row("22", "lisi", "11",1, 1),
+ Row("33", "zhangsan", "12", 1, 1))
+ val rdd2 = jsc.parallelize(records2)
+ val recordsDF2 = spark.createDataFrame(rdd2, schema2)
+ recordsDF2.write.format("org.apache.hudi")
Review comment:
yes, will throw schema incompatible
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]