shawnding opened a new issue #463: Write a dataFrame to  a table while after 
add an  'optional longtype' column ,  then get  some dirty data from the new 
column
URL: https://github.com/apache/incubator-iceberg/issues/463
 
 
    import org.apache.spark.sql.SparkSession
    import org.apache.iceberg.hive.HiveCatalog
    import org.apache.iceberg.Schema
    import org.apache.iceberg.types.Types._
    import org.apache.iceberg.PartitionSpec
    import org.apache.iceberg.catalog.TableIdentifier
    import org.apache.log4j.Logger
    import java.sql.Timestamp
   
   val schema = new Schema(
            NestedField.optional(1, "id", LongType.get())
   )
   
   val spec = PartitionSpec.builderFor(schema).identity("id").build()
   val catalog = new HiveCatalog(spark.sparkContext.hadoopConfiguration)
   val table_name = "iceberg_partition_test_120"
   val name = TableIdentifier.of("default", table_name)
   val table = catalog.createTable(name, schema, spec)
   
   import spark.implicits._
   
   case class Record(id: Int)
   val recordsDF = spark.createDataFrame((1 to 3).map(i => Record(i)))
   
recordsDF.write.format("iceberg").mode("append").save(s"default.${table_name}")
   spark.read.format("iceberg").load(s"default.${table_name}").show
   
     **//----show return ----**
     +---+
     | id|
     +---+
     |  1|
     |  2|
     |  3|
     +---+
   
   table.updateSchema().addColumn("phone", LongType.get()).commit();
   val recordsDF = spark.createDataFrame((4 to 6).map(i => Record(i)))
   
recordsDF.write.format("iceberg").mode("append").save(s"default.${table_name}")
   spark.read.format("iceberg").load(s"default.${table_name}").show
   
     **//----show return ----( then some dirty numbers in the new column )**
    +---+-----+
    | id|phone|
    +---+-----+
    |  4|    **5**|            `  _( dirty data)_`
    |  5|    **5**|            `  _( dirty data)_`
    |  6|    **5**|            `  _(dirty data )_`
    |  1| null|
    |  2| null|
    |  3| null|
    +---+-----+
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to