zeroshade commented on code in PR #524: URL: https://github.com/apache/iceberg-go/pull/524#discussion_r2353108892
########## manifest.go: ########## @@ -1546,18 +1689,39 @@ func (d *dataFile) initializeMapData() { d.fieldIDToPartitionData = make(map[int]any, len(d.PartitionData)) for k, v := range d.PartitionData { if id, ok := d.fieldNameToID[k]; ok { - d.fieldIDToPartitionData[id] = v + convertedValue := d.convertAvroValueToIcebergType(v, id) + d.fieldIDToPartitionData[id] = convertedValue } } } - d.fieldIDToPartitionData = avroPartitionData(d.fieldIDToPartitionData, d.fieldIDToLogicalType) }) } +func (d *dataFile) convertAvroValueToIcebergType(v any, fieldID int) any { + if t, ok := v.(time.Time); ok { + if logicalType, hasLogical := d.fieldIDToLogicalType[fieldID]; hasLogical { + switch logicalType { + case avro.Date: + days := int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())) + + return Date(days) + case avro.TimestampMicros: + return Timestamp(t.UTC().UnixMicro()) Review Comment: what about TimeMillis, TimeMicros, and TimestampMillis? Would make more sense to grab the old implementation of `avroPartitionData` and use that since that is already what we were using for reading. ########## manifest.go: ########## @@ -1546,18 +1689,39 @@ func (d *dataFile) initializeMapData() { d.fieldIDToPartitionData = make(map[int]any, len(d.PartitionData)) for k, v := range d.PartitionData { if id, ok := d.fieldNameToID[k]; ok { - d.fieldIDToPartitionData[id] = v + convertedValue := d.convertAvroValueToIcebergType(v, id) + d.fieldIDToPartitionData[id] = convertedValue } } } - d.fieldIDToPartitionData = avroPartitionData(d.fieldIDToPartitionData, d.fieldIDToLogicalType) }) } +func (d *dataFile) convertAvroValueToIcebergType(v any, fieldID int) any { + if t, ok := v.(time.Time); ok { + if logicalType, hasLogical := d.fieldIDToLogicalType[fieldID]; hasLogical { + switch logicalType { + case avro.Date: + days := int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())) + + return Date(days) + case avro.TimestampMicros: + return Timestamp(t.UTC().UnixMicro()) + } + } + days := int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds())) + + return days Review Comment: this seems to be in the wrong spot ########## manifest_test.go: ########## @@ -82,7 +82,7 @@ var ( Content: EntryContentEqDeletes, Path: "/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet", Format: ParquetFile, - PartitionData: map[string]any{"VendorID": int(1), "tpep_pickup_datetime": time.Unix(1925, 0).UnixMicro()}, + PartitionData: map[string]any{"VendorID": int(1), "tpep_pickup_datetime": time.Unix(1925, 0)}, Review Comment: the partition data should be a timestamp (`UnixMicro` and potentially even `iceberg.Timestamp` type), not a `time.Time`. We should only be converting to `time.Time` for writing -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org