badalprasadsingh commented on code in PR #524:
URL: https://github.com/apache/iceberg-go/pull/524#discussion_r2371167243


##########
manifest.go:
##########
@@ -1583,18 +1722,87 @@ func (d *dataFile) initializeMapData() {
                        d.fieldIDToPartitionData = make(map[int]any, 
len(d.PartitionData))
                        for k, v := range d.PartitionData {
                                if id, ok := d.fieldNameToID[k]; ok {
-                                       d.fieldIDToPartitionData[id] = v
+                                       convertedValue := 
d.convertAvroValueToIcebergType(v, id)
+                                       d.fieldIDToPartitionData[id] = 
convertedValue
                                }
                        }
                }
-               d.fieldIDToPartitionData = 
avroPartitionData(d.fieldIDToPartitionData, d.fieldIDToLogicalType)
        })
 }
 
+func (d *dataFile) convertAvroValueToIcebergType(v any, fieldID int) any {
+       if logicalType, ok := d.fieldIDToLogicalType[fieldID]; ok {
+               switch logicalType {
+               case avro.Date:
+                       if val, ok := v.(time.Time); ok {
+                               return Date(val.Truncate(24*time.Hour).Unix() / 
int64((time.Hour * 24).Seconds()))
+                       }
+
+                       return Date(v.(int32))
+               case avro.TimeMillis:
+                       if val, ok := v.(time.Duration); ok {
+                               return Time(val.Milliseconds())
+                       }
+
+                       return Time(v.(int64))
+               case avro.TimeMicros:
+                       if val, ok := v.(time.Duration); ok {
+                               return Time(val.Microseconds())
+                       }
+
+                       return Time(v.(int64))
+               case avro.TimestampMillis:
+                       if val, ok := v.(time.Time); ok {
+                               return Timestamp(val.UTC().UnixMilli())
+                       }
+
+                       return Timestamp(v.(int64))
+               case avro.TimestampMicros:
+                       if val, ok := v.(time.Time); ok {
+                               return Timestamp(val.UTC().UnixMicro())
+                       }
+
+                       return Timestamp(v.(int64))
+               case avro.Decimal:
+                       if unionMap, ok := v.(map[string]interface{}); ok {
+                               if val, ok := unionMap["fixed"]; ok {
+                                       if bigRatValue, ok := val.(*big.Rat); 
ok {
+                                               scale := 
d.fieldIDToFixedSize[fieldID]
+                                               scaleFactor := 
new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(scale)), nil)
+                                               unscaled := 
new(big.Int).Mul(bigRatValue.Num(), scaleFactor)
+                                               unscaled = 
unscaled.Div(unscaled, bigRatValue.Denom())
+                                               decimal128Val := 
decimal128.FromBigInt(unscaled)
+
+                                               return DecimalLiteral{
+                                                       Scale: scale,
+                                                       Val:   decimal128Val,
+                                               }
+                                       }
+                               }
+                       }

Review Comment:
   Well, according to 
[`hamba/avro`](https://github.com/hamba/avro/blob/7f12625a3fc75716c58bc22cc3209bc54599aabc/README.md?plain=1#L95),
 for both `fixed.Decimal` and `bytes.Decimal`, a decimal is returned in the 
form of `*big.Rat`.  
   So, we have to convert it to `iceberg.Decimal` from `*big.Rat` anyway.
   
   One way to avoid `*big.Rat` is to define the Avro union schema of decimal as:
   
   ```json
   ["null", "bytes"]
   ````
   
   With this, we can simply use (scale is still needed to be stored while 
writing):
   
   ```go
   decLit.UnmarshalBinary(decoded.([]byte))
   ```
   
   But then it would no longer be a `logical` type, making it difficult to 
identify as a decimal schema while reading in the `getFieldIDMap()` function.
   
   So, should we do it? or, can we pick it up as a performance improvement 
later?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to