zeroshade commented on code in PR #524:
URL: https://github.com/apache/iceberg-go/pull/524#discussion_r2314393801


##########
manifest.go:
##########
@@ -1461,33 +1598,114 @@ func mapToAvroColMap[K comparable, V any](m map[K]V) 
*[]colMap[K, V] {
        return &out
 }
 
-func avroPartitionData(input map[int]any, logicalTypes 
map[int]avro.LogicalType) map[int]any {
+func avroPartitionData(input map[int]any, logicalTypes 
map[int]avro.LogicalType, fixedSizes map[int]int) map[int]any {
        out := make(map[int]any)
        for k, v := range input {
                if logical, ok := logicalTypes[k]; ok {
-                       switch logical {
-                       case avro.Date:
-                               out[k] = 
Date(v.(time.Time).Truncate(24*time.Hour).Unix() / int64((time.Hour * 
24).Seconds()))
-                       case avro.TimeMillis:
-                               out[k] = Time(v.(time.Duration).Milliseconds())
-                       case avro.TimeMicros:
-                               out[k] = Time(v.(time.Duration).Microseconds())
-                       case avro.TimestampMillis:
-                               out[k] = 
Timestamp(v.(time.Time).UTC().UnixMilli())
-                       case avro.TimestampMicros:
-                               out[k] = 
Timestamp(v.(time.Time).UTC().UnixMicro())
-                       default:
-                               out[k] = v
-                       }
-
-                       continue
+                       out[k] = convertLogicalTypeValue(v, logical, 
fixedSizes[k])
+               } else {
+                       out[k] = convertDefaultValue(v, fixedSizes[k])
                }
-               out[k] = v
        }
 
        return out
 }
 
+func convertLogicalTypeValue(v any, logicalType avro.LogicalType, fixedSize 
int) any {
+       switch logicalType {
+       case avro.Date:
+               return convertDateValue(v)
+       case avro.TimeMicros:
+               return convertTimeMicrosValue(v)
+       case avro.TimestampMicros:
+               return convertTimestampMicrosValue(v)
+       case avro.Decimal:
+               return convertDecimalValue(v, fixedSize)
+       default:
+               return v
+       }
+}
+
+func convertDateValue(v any) any {
+       if t, ok := v.(time.Time); ok {
+               return map[string]any{"int.date": 
int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))}
+       }
+       if d, ok := v.(Date); ok {
+               return map[string]any{"int.date": int32(d)}
+       }
+
+       return v
+}
+
+func convertTimeMicrosValue(v any) any {
+       if t, ok := v.(Time); ok {
+               return map[string]any{"long.time-micros": int64(t)}
+       }
+       if d, ok := v.(time.Duration); ok {
+               return map[string]any{"long.time-micros": d.Microseconds()}
+       }
+
+       return v
+}
+
+func convertTimestampMicrosValue(v any) any {
+       if t, ok := v.(time.Time); ok {
+               return map[string]any{"long.timestamp-micros": 
t.UTC().UnixMicro()}
+       }
+       if ts, ok := v.(Timestamp); ok {
+               return map[string]any{"long.timestamp-micros": int64(ts)}
+       }
+
+       return v
+}
+
+func convertDecimalValue(v any, fixedSize int) any {
+       if v == nil {
+               return map[string]any{"null": nil}
+       }
+
+       dec, ok := v.(Decimal)
+       if !ok {
+               return v
+       }
+
+       bytes, err := DecimalLiteral(dec).MarshalBinary()
+       if err != nil {
+               return v
+       }
+       fixedArray := convertToFixedArray(padOrTruncateBytes(bytes, fixedSize), 
fixedSize)

Review Comment:
   why converting to fixed array instead of just returning the decimal type?



##########
manifest.go:
##########
@@ -1461,33 +1598,114 @@ func mapToAvroColMap[K comparable, V any](m map[K]V) 
*[]colMap[K, V] {
        return &out
 }
 
-func avroPartitionData(input map[int]any, logicalTypes 
map[int]avro.LogicalType) map[int]any {
+func avroPartitionData(input map[int]any, logicalTypes 
map[int]avro.LogicalType, fixedSizes map[int]int) map[int]any {
        out := make(map[int]any)
        for k, v := range input {
                if logical, ok := logicalTypes[k]; ok {
-                       switch logical {
-                       case avro.Date:
-                               out[k] = 
Date(v.(time.Time).Truncate(24*time.Hour).Unix() / int64((time.Hour * 
24).Seconds()))
-                       case avro.TimeMillis:
-                               out[k] = Time(v.(time.Duration).Milliseconds())
-                       case avro.TimeMicros:
-                               out[k] = Time(v.(time.Duration).Microseconds())
-                       case avro.TimestampMillis:
-                               out[k] = 
Timestamp(v.(time.Time).UTC().UnixMilli())
-                       case avro.TimestampMicros:
-                               out[k] = 
Timestamp(v.(time.Time).UTC().UnixMicro())
-                       default:
-                               out[k] = v
-                       }
-
-                       continue
+                       out[k] = convertLogicalTypeValue(v, logical, 
fixedSizes[k])
+               } else {
+                       out[k] = convertDefaultValue(v, fixedSizes[k])
                }
-               out[k] = v
        }
 
        return out
 }
 
+func convertLogicalTypeValue(v any, logicalType avro.LogicalType, fixedSize 
int) any {
+       switch logicalType {
+       case avro.Date:
+               return convertDateValue(v)
+       case avro.TimeMicros:
+               return convertTimeMicrosValue(v)
+       case avro.TimestampMicros:
+               return convertTimestampMicrosValue(v)
+       case avro.Decimal:
+               return convertDecimalValue(v, fixedSize)
+       default:
+               return v
+       }
+}
+
+func convertDateValue(v any) any {
+       if t, ok := v.(time.Time); ok {
+               return map[string]any{"int.date": 
int32(t.Truncate(24*time.Hour).Unix() / int64((time.Hour * 24).Seconds()))}
+       }
+       if d, ok := v.(Date); ok {
+               return map[string]any{"int.date": int32(d)}
+       }
+
+       return v
+}
+
+func convertTimeMicrosValue(v any) any {
+       if t, ok := v.(Time); ok {
+               return map[string]any{"long.time-micros": int64(t)}
+       }
+       if d, ok := v.(time.Duration); ok {
+               return map[string]any{"long.time-micros": d.Microseconds()}
+       }
+
+       return v
+}
+
+func convertTimestampMicrosValue(v any) any {
+       if t, ok := v.(time.Time); ok {
+               return map[string]any{"long.timestamp-micros": 
t.UTC().UnixMicro()}
+       }
+       if ts, ok := v.(Timestamp); ok {
+               return map[string]any{"long.timestamp-micros": int64(ts)}
+       }
+
+       return v
+}
+
+func convertDecimalValue(v any, fixedSize int) any {
+       if v == nil {
+               return map[string]any{"null": nil}
+       }
+
+       dec, ok := v.(Decimal)
+       if !ok {
+               return v
+       }
+
+       bytes, err := DecimalLiteral(dec).MarshalBinary()
+       if err != nil {
+               return v
+       }
+       fixedArray := convertToFixedArray(padOrTruncateBytes(bytes, fixedSize), 
fixedSize)
+
+       return map[string]any{"fixed": fixedArray}
+}
+
+func convertDefaultValue(v any, fixedSize int) any {
+       if uuidVal, ok := v.(uuid.UUID); ok {
+               return uuidVal.String()
+       }
+
+       if bytes, ok := v.([]byte); ok && fixedSize > 0 {
+               return convertToFixedArray(padOrTruncateBytes(bytes, 
fixedSize), fixedSize)

Review Comment:
   why do we need to do this? We can't just return the uuid?



##########
exprs.go:
##########
@@ -482,8 +482,33 @@ func (b *boundRef[T]) Equals(other BoundTerm) bool {
 }
 
 func (b *boundRef[T]) Ref() BoundReference { return b }
-func (b *boundRef[T]) Field() NestedField  { return b.field }
-func (b *boundRef[T]) Type() Type          { return b.field.Type }
+
+func unwrapLogicalTypeValue(v any) any {
+       if m, ok := v.(map[string]any); ok {
+               if val, exists := m["long.timestamp-micros"]; exists {
+                       if microseconds, ok := val.(int64); ok {
+                               return Timestamp(microseconds)
+                       }
+               }
+
+               if val, exists := m["int.date"]; exists {
+                       if days, ok := val.(int32); ok {
+                               return days
+                       }
+               }
+
+               if val, exists := m["long.time-micros"]; exists {
+                       if microseconds, ok := val.(int64); ok {
+                               return Time(microseconds)
+                       }
+               }
+       }
+
+       return v

Review Comment:
   this conversion should be happening before we ever get this far. What 
workflow would result in us getting here using `map[string]any`?



##########
manifest.go:
##########
@@ -416,30 +429,53 @@ func getFieldIDMap(sc avro.Schema) (map[string]int, 
map[int]avro.LogicalType) {
 
        result := make(map[string]int)
        logicalTypes := make(map[int]avro.LogicalType)
+       fixedSizes := make(map[int]int)
+
        entryField := getField(sc.(*avro.RecordSchema), "data_file")
        partitionField := getField(entryField.Type().(*avro.RecordSchema), 
"partition")
 
        for _, field := range 
partitionField.Type().(*avro.RecordSchema).Fields() {
-               if fid, ok := field.Prop("field-id").(float64); ok {
-                       result[field.Name()] = int(fid)
-                       avroTyp := field.Type()
-                       if us, ok := avroTyp.(*avro.UnionSchema); ok {
-                               for _, t := range us.Types() {
-                                       avroTyp = t
-                               }
+               var fid int
+               switch v := field.Prop("field-id").(type) {

Review Comment:
   does the `field-id` come back as a float instead of an `int` for some reason?



##########
table/arrow_utils.go:
##########
@@ -1030,11 +1031,19 @@ func (sc *schemaCompatVisitor) isFieldCompat(lhs 
iceberg.NestedField) bool {
 
 func (sc *schemaCompatVisitor) Schema(s *iceberg.Schema, v func() bool) bool {
        if !v() {
-               pterm.DisableColor()
-               tbl := 
pterm.DefaultTable.WithHasHeader(true).WithData(sc.errorData)
-               tbl.Render()
-               txt, _ := tbl.Srender()
-               pterm.EnableColor()
+               var lines []string
+               lines = append(lines, "   | Table Field              | 
Requested Field")
+
+               for i, row := range sc.errorData {
+                       if i == 0 {
+                               continue
+                       }
+                       if len(row) >= 3 {
+                               lines = append(lines, fmt.Sprintf("%s | %-24s | 
%s", row[0], row[1], row[2]))
+                       }
+               }

Review Comment:
   why the change here?



##########
manifest.go:
##########
@@ -416,30 +429,53 @@ func getFieldIDMap(sc avro.Schema) (map[string]int, 
map[int]avro.LogicalType) {
 
        result := make(map[string]int)
        logicalTypes := make(map[int]avro.LogicalType)
+       fixedSizes := make(map[int]int)
+
        entryField := getField(sc.(*avro.RecordSchema), "data_file")
        partitionField := getField(entryField.Type().(*avro.RecordSchema), 
"partition")
 
        for _, field := range 
partitionField.Type().(*avro.RecordSchema).Fields() {
-               if fid, ok := field.Prop("field-id").(float64); ok {
-                       result[field.Name()] = int(fid)
-                       avroTyp := field.Type()
-                       if us, ok := avroTyp.(*avro.UnionSchema); ok {
-                               for _, t := range us.Types() {
-                                       avroTyp = t
-                               }
+               var fid int
+               switch v := field.Prop("field-id").(type) {
+               case int:
+                       fid = v
+               case int32:
+                       fid = int(v)
+               case int64:
+                       fid = int(v)
+               case float64:
+                       fid = int(v)
+               default:
+                       continue
+               }
+
+               result[field.Name()] = fid
+               avroTyp := field.Type()
+               if us, ok := avroTyp.(*avro.UnionSchema); ok {
+                       for _, t := range us.Types() {
+                               avroTyp = t
                        }

Review Comment:
   should we confirm that we're not using null?



##########
manifest.go:
##########
@@ -960,6 +1003,37 @@ func (p *partitionFieldStats[T]) update(value any) (err 
error) {
        return nil
 }
 
+func extractBytesFromFixed(fixedBytes interface{}) []byte {
+       switch fb := fixedBytes.(type) {
+       case []interface{}:

Review Comment:
   What situation does this happen with? Where we get `[]interface{}` instead 
of `[]byte`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to