zeroshade commented on code in PR #524:
URL: https://github.com/apache/iceberg-go/pull/524#discussion_r2353108892


##########
manifest.go:
##########
@@ -1546,18 +1689,39 @@ func (d *dataFile) initializeMapData() {
                        d.fieldIDToPartitionData = make(map[int]any, 
len(d.PartitionData))
                        for k, v := range d.PartitionData {
                                if id, ok := d.fieldNameToID[k]; ok {
-                                       d.fieldIDToPartitionData[id] = v
+                                       convertedValue := 
d.convertAvroValueToIcebergType(v, id)
+                                       d.fieldIDToPartitionData[id] = 
convertedValue
                                }
                        }
                }
-               d.fieldIDToPartitionData = 
avroPartitionData(d.fieldIDToPartitionData, d.fieldIDToLogicalType)
        })
 }
 
+func (d *dataFile) convertAvroValueToIcebergType(v any, fieldID int) any {
+       if t, ok := v.(time.Time); ok {
+               if logicalType, hasLogical := d.fieldIDToLogicalType[fieldID]; 
hasLogical {
+                       switch logicalType {
+                       case avro.Date:
+                               days := int32(t.Truncate(24*time.Hour).Unix() / 
int64((time.Hour * 24).Seconds()))
+
+                               return Date(days)
+                       case avro.TimestampMicros:
+                               return Timestamp(t.UTC().UnixMicro())

Review Comment:
   what about TimeMillis, TimeMicros, and TimestampMillis? Would make more 
sense to grab the old implementation of `avroPartitionData` and use that since 
that is already what we were using for reading.



##########
manifest.go:
##########
@@ -1546,18 +1689,39 @@ func (d *dataFile) initializeMapData() {
                        d.fieldIDToPartitionData = make(map[int]any, 
len(d.PartitionData))
                        for k, v := range d.PartitionData {
                                if id, ok := d.fieldNameToID[k]; ok {
-                                       d.fieldIDToPartitionData[id] = v
+                                       convertedValue := 
d.convertAvroValueToIcebergType(v, id)
+                                       d.fieldIDToPartitionData[id] = 
convertedValue
                                }
                        }
                }
-               d.fieldIDToPartitionData = 
avroPartitionData(d.fieldIDToPartitionData, d.fieldIDToLogicalType)
        })
 }
 
+func (d *dataFile) convertAvroValueToIcebergType(v any, fieldID int) any {
+       if t, ok := v.(time.Time); ok {
+               if logicalType, hasLogical := d.fieldIDToLogicalType[fieldID]; 
hasLogical {
+                       switch logicalType {
+                       case avro.Date:
+                               days := int32(t.Truncate(24*time.Hour).Unix() / 
int64((time.Hour * 24).Seconds()))
+
+                               return Date(days)
+                       case avro.TimestampMicros:
+                               return Timestamp(t.UTC().UnixMicro())
+                       }
+               }
+               days := int32(t.Truncate(24*time.Hour).Unix() / 
int64((time.Hour * 24).Seconds()))
+
+               return days

Review Comment:
   this seems to be in the wrong spot



##########
manifest_test.go:
##########
@@ -82,7 +82,7 @@ var (
                                Content:          EntryContentEqDeletes,
                                Path:             
"/home/iceberg/warehouse/nyc/taxis_partitioned/data/VendorID=null/00000-633-d8a4223e-dc97-45a1-86e1-adaba6e8abd7-00001.parquet",
                                Format:           ParquetFile,
-                               PartitionData:    map[string]any{"VendorID": 
int(1), "tpep_pickup_datetime": time.Unix(1925, 0).UnixMicro()},
+                               PartitionData:    map[string]any{"VendorID": 
int(1), "tpep_pickup_datetime": time.Unix(1925, 0)},

Review Comment:
   the partition data should be a timestamp (`UnixMicro` and potentially even 
`iceberg.Timestamp` type), not a `time.Time`. We should only be converting to 
`time.Time` for writing



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to