xiarixiaoyao commented on issue #8160: URL: https://github.com/apache/hudi/issues/8160#issuecomment-1465668173
@kapjoshi-cisco https://github.com/apache/hudi/pull/7326/files has already merged, pls set hoodie.datasource.write.reconcile.schema=true, it should be work. test with master branch, PASS ``` test("Test schema cc") { withTempDir { tmp => Seq("COPY_ON_WRITE").foreach { tableType => // for complex schema. val tableName = generateTableName val tablePath = s"${new Path(tmp.getCanonicalPath, tableName).toUri.toString}" if (HoodieSparkUtils.gteqSpark3_1) { val dataGen = new DataGenerator val inserts = convertToStringList(dataGen.generateInserts(10)) val df = spark.read.json(spark.sparkContext.parallelize(inserts, 2)).withColumn("longCol", lit(10L)) df.write.format("hudi"). options(getQuickstartWriteConfigs). option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, tableType). option(PRECOMBINE_FIELD_OPT_KEY, "ts"). option(RECORDKEY_FIELD_OPT_KEY, "uuid"). option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath"). option("hoodie.schema.on.read.enable","true"). option(TABLE_NAME.key(), tableName). option("hoodie.table.name", tableName). mode("overwrite"). save(tablePath) val updates = convertToStringList(dataGen.generateUpdates(10)) // type change: longCol (long -> double) // type change: fare (double -> String) // add new column and drop a column val dfUpdate = spark.read.json(spark.sparkContext.parallelize(updates, 2)) .withColumn("fare", expr("cast(fare as string)")) .withColumn("longCol", lit(0.01D)) dfUpdate.drop("begin_lat").write.format("hudi"). options(getQuickstartWriteConfigs). option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, tableType). option(PRECOMBINE_FIELD_OPT_KEY, "ts"). option(RECORDKEY_FIELD_OPT_KEY, "uuid"). option(PARTITIONPATH_FIELD_OPT_KEY, "partitionpath"). option("hoodie.schema.on.read.enable","true"). option("hoodie.datasource.write.reconcile.schema","true"). option(TABLE_NAME.key(), tableName). option("hoodie.table.name", tableName). mode("append"). save(tablePath) spark.sql("set hoodie.schema.on.read.enable=true") spark.read.format("hudi").load(tablePath).show(false) } } } } ``` question2: pls set hoodie.schema.on.read.enable=true if you want do drop column -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
