tandonraghav commented on issue #2919:
URL: https://github.com/apache/hudi/issues/2919#issuecomment-860846241


   @n3nash @vinothchandar I am seeing the entire schema is getting persisted in 
Glue **TBLPROPERTIES**. This was not the behaviour previously. Do we need 
schema there as well or we can have a config to switch it off?
   
   Hudi version - 0.9.0-SNAPSHOT
   
   ````
   hive> show create table max_ro;
   OK
   CREATE EXTERNAL TABLE `max_ro`(
     `_hoodie_commit_time` string, 
     `_hoodie_commit_seqno` string, 
     `_hoodie_record_key` string, 
     `_hoodie_partition_path` string, 
     `_hoodie_file_name` string, 
     `string_pincode_113` string, 
     `double_pincode_113` double, 
     `string_availability_157` string, 
     `string_availability2_169` string, 
     `string_availability3_150` string, 
     `string_availability4_158` string, 
     `string_availability5_187` string, 
     `string_availability6_150` string, 
     `string_availability7_778` string, 
     `string_availability8_192` string, 
     `string_availability9_700` string, 
     `string_availability10_131` string, 
     `string_availability11_186` string, 
     `string_availability12_878` string, 
     `string_availability13_466` string, 
     `id` string, 
     `product_id` string, 
     `catalog_id` string, 
     `feed_id` string, 
     `ts_ms` double, 
     `op` string)
   PARTITIONED BY ( 
     `db_name` string)
   ROW FORMAT SERDE 
     'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' 
   WITH SERDEPROPERTIES ( 
     'path'='file:/tmp/test/hudi-user-data/max') 
   STORED AS INPUTFORMAT 
     'org.apache.hudi.hadoop.HoodieParquetInputFormat' 
   OUTPUTFORMAT 
     'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
   LOCATION
     'file:/tmp/test/hudi-user-data/max'
   TBLPROPERTIES (
     'last_commit_time_sync'='20210614221425', 
     'last_modified_by'='raghav', 
     'last_modified_time'='1623689081', 
     'spark.sql.sources.provider'='hudi', 
     'spark.sql.sources.schema.numPartCols'='1', 
     'spark.sql.sources.schema.numParts'='1', 
     
'spark.sql.sources.schema.part.0'='{"type":"struct","fields":[{"name":"_hoodie_commit_time","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_commit_seqno","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_record_key","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_partition_path","type":"string","nullable":true,"metadata":{}},{"name":"_hoodie_file_name","type":"string","nullable":true,"metadata":{}},{"name":"string_pincode_113","type":"string","nullable":true,"metadata":{}},{"name":"double_pincode_113","type":"double","nullable":true,"metadata":{}},{"name":"string_availability_157","type":"string","nullable":true,"metadata":{}},{"name":"string_availability2_169","type":"string","nullable":true,"metadata":{}},{"name":"string_availability3_150","type":"string","nullable":true,"metadata":{}},{"name":"string_availability4_158","type":"string","nullable":true,"metadata":{}},{"name":"string_availability5_187","type":"string","nullable":true
 
,"metadata":{}},{"name":"string_availability6_150","type":"string","nullable":true,"metadata":{}},{"name":"string_availability7_778","type":"string","nullable":true,"metadata":{}},{"name":"string_availability8_192","type":"string","nullable":true,"metadata":{}},{"name":"string_availability9_700","type":"string","nullable":true,"metadata":{}},{"name":"string_availability10_131","type":"string","nullable":true,"metadata":{}},{"name":"string_availability11_186","type":"string","nullable":true,"metadata":{}},{"name":"string_availability12_878","type":"string","nullable":true,"metadata":{}},{"name":"string_availability13_466","type":"string","nullable":true,"metadata":{}},{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"product_id","type":"string","nullable":true,"metadata":{}},{"name":"catalog_id","type":"string","nullable":true,"metadata":{}},{"name":"feed_id","type":"string","nullable":true,"metadata":{}},{"name":"ts_ms","type":"double","nullable":true,"metadata":{
 
}},{"name":"op","type":"string","nullable":true,"metadata":{}},{"name":"db_name","type":"string","nullable":true,"metadata":{}}]}',
 
     'spark.sql.sources.schema.partCol.0'='db_name', 
     'transient_lastDdlTime'='1623689081')
   ````


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to