zeroshade commented on code in PR #779:
URL: https://github.com/apache/iceberg-go/pull/779#discussion_r2943223059
##########
table/arrow_utils.go:
##########
@@ -715,12 +718,131 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// numericDefault converts v to T, accepting either the typed iceberg form or
+// the float64 that encoding/json produces when deserializing into any.
+func numericDefault[T ~int32 | ~int64 | ~float32 | ~float64](v any) T {
+ switch val := v.(type) {
+ case T:
+ return val
+ case float64:
+ return T(val)
+ }
Review Comment:
we always get either the T we wanted or `float64`?
##########
table/arrow_utils.go:
##########
@@ -715,12 +718,131 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// numericDefault converts v to T, accepting either the typed iceberg form or
+// the float64 that encoding/json produces when deserializing into any.
+func numericDefault[T ~int32 | ~int64 | ~float32 | ~float64](v any) T {
+ switch val := v.(type) {
+ case T:
+ return val
+ case float64:
+ return T(val)
+ }
+ panic(fmt.Errorf("unsupported write-default value type %T for numeric
iceberg type", v))
+}
+
+// defaultToScalar converts an Iceberg default value to an Arrow scalar.
+func defaultToScalar(v any, t iceberg.Type, dt arrow.DataType) scalar.Scalar {
+ switch typ := t.(type) {
+ case iceberg.Float32Type:
+ s, err := scalar.MakeScalarParam(numericDefault[float32](v), dt)
+ if err != nil {
+ panic(fmt.Errorf("write-default float32 (iceberg type
%s, value %v %T): %w", t, v, v, err))
+ }
+
+ return s
+ case iceberg.DateType:
+ return
scalar.NewDate32Scalar(arrow.Date32(numericDefault[iceberg.Date](v)))
+ case iceberg.TimeType:
+ return
scalar.NewTime64Scalar(arrow.Time64(numericDefault[iceberg.Time](v)), dt)
+ case iceberg.TimestampType, iceberg.TimestampTzType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(numericDefault[iceberg.Timestamp](v)),
dt)
+ case iceberg.TimestampNsType, iceberg.TimestampTzNsType:
+ return
scalar.NewTimestampScalar(arrow.Timestamp(numericDefault[iceberg.TimestampNano](v)),
dt)
+ case iceberg.UUIDType:
+ switch val := v.(type) {
+ case uuid.UUID:
+ s, err := scalar.MakeScalarParam(val[:],
&arrow.FixedSizeBinaryType{ByteWidth: 16})
+ if err != nil {
+ panic(fmt.Errorf("write-default uuid (value
%v): %w", val, err))
+ }
+
+ return s
+ case string:
+ u, err := uuid.Parse(val)
+ if err != nil {
+ panic(fmt.Errorf("write-default uuid: cannot
parse string %q: %w", val, err))
+ }
+ s, err := scalar.MakeScalarParam(u[:],
&arrow.FixedSizeBinaryType{ByteWidth: 16})
+ if err != nil {
+ panic(fmt.Errorf("write-default uuid (value
%v): %w", val, err))
+ }
+
+ return s
+ }
+ panic(fmt.Errorf("write-default uuid: unsupported value type %T
(%v)", v, v))
+ case iceberg.DecimalType:
+ switch val := v.(type) {
+ case iceberg.Decimal:
+ return scalar.NewDecimal128Scalar(val.Val, dt)
+ case string:
+ n, err := decimal128.FromString(val,
int32(typ.Precision()), int32(typ.Scale()))
+ if err != nil {
+ panic(fmt.Errorf("write-default decimal(p=%d,
s=%d): cannot parse string %q: %w", typ.Precision(), typ.Scale(), val, err))
+ }
+
+ return scalar.NewDecimal128Scalar(n, dt)
+ }
+ panic(fmt.Errorf("write-default decimal: unsupported value type
%T (%v)", v, v))
+ case iceberg.BinaryType, iceberg.FixedType:
+ switch val := v.(type) {
+ case []byte:
+ s, err := scalar.MakeScalarParam(val, dt)
+ if err != nil {
+ panic(fmt.Errorf("write-default binary/fixed
(iceberg type %s, value %v): %w", t, val, err))
+ }
+
+ return s
+ case string:
+ b, err := base64.StdEncoding.DecodeString(val)
+ if err != nil {
+ panic(fmt.Errorf("write-default binary/fixed
(iceberg type %s): cannot base64-decode string %q: %w", t, val, err))
+ }
+ s, err := scalar.MakeScalarParam(b, dt)
+ if err != nil {
+ panic(fmt.Errorf("write-default binary/fixed
(iceberg type %s, value %v): %w", t, b, err))
+ }
+
+ return s
+ }
+ panic(fmt.Errorf("write-default binary/fixed: unsupported value
type %T (%v)", v, v))
+ // Float64, Bool, and String cast normally.
+ // Int32 and Int64 arrive as float64 from JSON and are handled by
MakeScalarParam.
+ default:
+ s, err := scalar.MakeScalarParam(v, dt)
+ if err != nil {
+ panic(fmt.Errorf("write-default (iceberg type %s, value
%v %T): %w", t, v, v, err))
+ }
+
+ return s
+ }
+}
+
+// defaultToArray creates an Arrow array of length n filled with the given
default value v.
+func defaultToArray(v any, t iceberg.Type, dt arrow.DataType, n int, alloc
memory.Allocator) arrow.Array {
+ sc := defaultToScalar(v, t, dt)
+ out, err := scalar.MakeArrayFromScalar(sc, n, alloc)
+ if err != nil {
+ panic(fmt.Errorf("write-default (iceberg type %s, value %v %T):
failed to create array: %w", t, v, v, err))
+ }
+ if _, ok := dt.(*extensions.UUIDType); ok {
+ defer out.Release()
+
+ data := array.NewData(dt, out.Len(), out.Data().Buffers(), nil,
out.NullN(), 0)
+ defer data.Release()
+
+ return array.MakeFromData(data)
+ }
Review Comment:
why do we need this?
##########
table/rolling_data_writer.go:
##########
@@ -332,7 +332,7 @@ func (r *RollingDataWriter) stream(outputDataFilesCh chan<-
iceberg.DataFile) {
}
converted, err := ToRequestedSchema(r.ctx, r.factory.fileSchema,
- r.factory.taskSchema, record, false, true, false)
+ r.factory.taskSchema, record, false, true, false, true)
Review Comment:
okay we REALLY need to convert `ToRequestedSchema` to take a struct or
something instead of this collection of bools so we know what each actually
stands for at call sites....
##########
table/arrow_utils.go:
##########
@@ -715,12 +718,131 @@ func retOrPanic[T any](v T, err error) T {
return v
}
+// numericDefault converts v to T, accepting either the typed iceberg form or
+// the float64 that encoding/json produces when deserializing into any.
+func numericDefault[T ~int32 | ~int64 | ~float32 | ~float64](v any) T {
+ switch val := v.(type) {
+ case T:
+ return val
+ case float64:
+ return T(val)
+ }
+ panic(fmt.Errorf("unsupported write-default value type %T for numeric
iceberg type", v))
+}
+
+// defaultToScalar converts an Iceberg default value to an Arrow scalar.
+func defaultToScalar(v any, t iceberg.Type, dt arrow.DataType) scalar.Scalar {
Review Comment:
it might make sense to fix `MakeScalarParam` for some of these cases instead
to simplify this
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]