This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-go.git
The following commit(s) were added to refs/heads/main by this push:
new 39b0197 feat(partitioning): Add partition specs/fields and basic
transform interface (#2)
39b0197 is described below
commit 39b019774631030c97a074816c63da4d38f0113d
Author: Matt Topol <[email protected]>
AuthorDate: Thu Sep 14 12:21:52 2023 -0400
feat(partitioning): Add partition specs/fields and basic transform
interface (#2)
---
dev/.rat-excludes | 1 +
errors.go | 1 +
go.mod | 4 +-
go.sum | 18 ++++-
partitions.go | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++
partitions_test.go | 143 +++++++++++++++++++++++++++++++++
schema.go | 2 +-
transforms.go | 173 +++++++++++++++++++++++++++++++++++++++
transforms_test.go | 89 ++++++++++++++++++++
utils.go | 5 +-
10 files changed, 661 insertions(+), 7 deletions(-)
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 785e1c9..b968f68 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -4,3 +4,4 @@ LICENSE
NOTICE
go.sum
build
+rat-results.txt
diff --git a/errors.go b/errors.go
index 801f5c5..c5bd870 100644
--- a/errors.go
+++ b/errors.go
@@ -24,4 +24,5 @@ var (
ErrNotImplemented = errors.New("not implemented")
ErrInvalidArgument = errors.New("invalid argument")
ErrInvalidSchema = errors.New("invalid schema")
+ ErrInvalidTransform = errors.New("invalid transform syntax")
)
diff --git a/go.mod b/go.mod
index 030d65e..6bea61d 100644
--- a/go.mod
+++ b/go.mod
@@ -21,11 +21,13 @@ go 1.20
require (
github.com/stretchr/testify v1.8.4
- golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb
+ golang.org/x/exp v0.0.0-20230905200255-921286631fa9
)
require (
github.com/davecgh/go-spew v1.1.1 // indirect
+ github.com/kr/pretty v0.3.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
+ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
diff --git a/go.sum b/go.sum
index 172fe17..460287c 100644
--- a/go.sum
+++ b/go.sum
@@ -1,12 +1,24 @@
+github.com/creack/pty v1.1.9/go.mod
h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.1
h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod
h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/kr/pretty v0.2.1/go.mod
h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
+github.com/kr/pretty v0.3.1/go.mod
h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
+github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
+github.com/kr/text v0.1.0/go.mod
h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod
h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
+github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod
h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA=
github.com/pmezard/go-difflib v1.0.0
h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod
h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/rogpeppe/go-internal v1.9.0
h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
+github.com/rogpeppe/go-internal v1.9.0/go.mod
h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/stretchr/testify v1.8.4
h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod
h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
-golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb
h1:mIKbk8weKhSeLH2GmUTrvx8CjkyJmnU1wFmg59CUjFA=
-golang.org/x/exp v0.0.0-20230811145659-89c5cff77bcb/go.mod
h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405
h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9
h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g=
+golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod
h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod
h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c
h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
+gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod
h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/partitions.go b/partitions.go
new file mode 100644
index 0000000..c24f082
--- /dev/null
+++ b/partitions.go
@@ -0,0 +1,232 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+ "encoding/json"
+ "fmt"
+ "strings"
+
+ "golang.org/x/exp/slices"
+)
+
+const (
+ partitionDataIDStart = 1000
+ InitialPartitionSpecID = 0
+)
+
+// UnpartitionedSpec is the default unpartitioned spec which can
+// be used for comparisons or to just provide a convenience for referencing
+// the same unpartitioned spec object.
+var UnpartitionedSpec = &PartitionSpec{id: 0}
+
+// PartitionField represents how one partition value is derived from the
+// source column by transformation.
+type PartitionField struct {
+ // SourceID is the source column id of the table's schema
+ SourceID int `json:"source-id"`
+ // FieldID is the partition field id across all the table partition
specs
+ FieldID int `json:"field-id"`
+ // Name is the name of the partition field itself
+ Name string `json:"name"`
+ // Transform is the transform used to produce the partition value
+ Transform Transform `json:"transform"`
+}
+
+func (p *PartitionField) String() string {
+ return fmt.Sprintf("%d: %s: %s(%d)", p.FieldID, p.Name, p.Transform,
p.SourceID)
+}
+
+func (p *PartitionField) UnmarshalJSON(b []byte) error {
+ type Alias PartitionField
+ aux := struct {
+ TransformString string `json:"transform"`
+ *Alias
+ }{
+ Alias: (*Alias)(p),
+ }
+
+ err := json.Unmarshal(b, &aux)
+ if err != nil {
+ return err
+ }
+
+ if p.Transform, err = ParseTransform(aux.TransformString); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// PartitionSpec captures the transformation from table data to partition
values
+type PartitionSpec struct {
+ // any change to a PartitionSpec will produce a new spec id
+ id int
+ fields []PartitionField
+
+ // this is populated by initialize after creation
+ sourceIdToFields map[int][]PartitionField
+}
+
+func NewPartitionSpec(fields ...PartitionField) PartitionSpec {
+ return NewPartitionSpecID(InitialPartitionSpecID, fields...)
+}
+
+func NewPartitionSpecID(id int, fields ...PartitionField) PartitionSpec {
+ ret := PartitionSpec{id: id, fields: fields}
+ ret.initialize()
+ return ret
+}
+
+// CompatibleWith returns true if this partition spec is considered
+// compatible with the passed in partition spec. This means that the two
+// specs have equivalent field lists regardless of the spec id.
+func (ps *PartitionSpec) CompatibleWith(other *PartitionSpec) bool {
+ if ps == other {
+ return true
+ }
+
+ if len(ps.fields) != len(other.fields) {
+ return false
+ }
+
+ return slices.EqualFunc(ps.fields, other.fields, func(left, right
PartitionField) bool {
+ return left.SourceID == right.SourceID && left.Name ==
right.Name &&
+ left.Transform == right.Transform
+ })
+}
+
+// Equals returns true iff the field lists are the same AND the spec id
+// is the same between this partition spec and the provided one.
+func (ps *PartitionSpec) Equals(other PartitionSpec) bool {
+ return ps.id == other.id && slices.Equal(ps.fields, other.fields)
+}
+
+func (ps PartitionSpec) MarshalJSON() ([]byte, error) {
+ if ps.fields == nil {
+ ps.fields = []PartitionField{}
+ }
+ return json.Marshal(struct {
+ ID int `json:"spec-id"`
+ Fields []PartitionField `json:"fields"`
+ }{ps.id, ps.fields})
+}
+
+func (ps *PartitionSpec) UnmarshalJSON(b []byte) error {
+ aux := struct {
+ ID int `json:"spec-id"`
+ Fields []PartitionField `json:"fields"`
+ }{ID: ps.id, Fields: ps.fields}
+
+ if err := json.Unmarshal(b, &aux); err != nil {
+ return err
+ }
+
+ ps.id, ps.fields = aux.ID, aux.Fields
+ ps.initialize()
+ return nil
+}
+
+func (ps *PartitionSpec) initialize() {
+ ps.sourceIdToFields = make(map[int][]PartitionField)
+ for _, f := range ps.fields {
+ ps.sourceIdToFields[f.SourceID] =
+ append(ps.sourceIdToFields[f.SourceID], f)
+ }
+}
+
+func (ps *PartitionSpec) ID() int { return ps.id }
+func (ps *PartitionSpec) NumFields() int { return len(ps.fields) }
+func (ps *PartitionSpec) Field(i int) PartitionField { return ps.fields[i] }
+
+func (ps *PartitionSpec) IsUnpartitioned() bool {
+ if len(ps.fields) == 0 {
+ return true
+ }
+
+ for _, f := range ps.fields {
+ if _, ok := f.Transform.(VoidTransform); !ok {
+ return false
+ }
+ }
+
+ return true
+}
+
+func (ps *PartitionSpec) FieldsBySourceID(fieldID int) []PartitionField {
+ return slices.Clone(ps.sourceIdToFields[fieldID])
+}
+
+func (ps PartitionSpec) String() string {
+ var b strings.Builder
+ b.WriteByte('[')
+ for i, f := range ps.fields {
+ if i == 0 {
+ b.WriteString("\n")
+ }
+ b.WriteString("\t")
+ b.WriteString(f.String())
+ b.WriteString("\n")
+ }
+ b.WriteByte(']')
+
+ return b.String()
+}
+
+func (ps *PartitionSpec) LastAssignedFieldID() int {
+ if len(ps.fields) == 0 {
+ return partitionDataIDStart - 1
+ }
+
+ id := ps.fields[0].FieldID
+ for _, f := range ps.fields[1:] {
+ if f.FieldID > id {
+ id = f.FieldID
+ }
+ }
+ return id
+}
+
+// PartitionType produces a struct of the partition spec.
+//
+// The partition fields should be optional:
+// - All partition transforms are required to produce null if the input value
+// is null. This can happen when the source column is optional.
+// - Partition fields may be added later, in which case not all files would
+// have the result field and it may be null.
+//
+// There is a case where we can guarantee that a partition field in the first
+// and only parittion spec that uses a required source column will never be
+// null, but it doesn't seem worth tracking this case.
+func (ps *PartitionSpec) PartitionType(schema *Schema) *StructType {
+ nestedFields := []NestedField{}
+ for _, field := range ps.fields {
+ sourceType, ok := schema.FindTypeByID(field.SourceID)
+ if !ok {
+ continue
+ }
+ resultType := field.Transform.ResultType(sourceType)
+ nestedFields = append(nestedFields, NestedField{
+ ID: field.FieldID,
+ Name: field.Name,
+ Type: resultType,
+ Required: false,
+ })
+ }
+ return &StructType{FieldList: nestedFields}
+}
diff --git a/partitions_test.go b/partitions_test.go
new file mode 100644
index 0000000..fd29190
--- /dev/null
+++ b/partitions_test.go
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg_test
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/apache/iceberg-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestPartitionSpec(t *testing.T) {
+ assert.Equal(t, 999, iceberg.UnpartitionedSpec.LastAssignedFieldID())
+
+ bucket := iceberg.BucketTransform{NumBuckets: 4}
+ idField1 := iceberg.PartitionField{
+ SourceID: 3, FieldID: 1001, Name: "id", Transform: bucket}
+ spec1 := iceberg.NewPartitionSpec(idField1)
+
+ assert.Zero(t, spec1.ID())
+ assert.Equal(t, 1, spec1.NumFields())
+ assert.Equal(t, idField1, spec1.Field(0))
+ assert.NotEqual(t, idField1, spec1)
+ assert.False(t, spec1.IsUnpartitioned())
+ assert.True(t, spec1.CompatibleWith(&spec1))
+ assert.True(t, spec1.Equals(spec1))
+ assert.Equal(t, 1001, spec1.LastAssignedFieldID())
+ assert.Equal(t, "[\n\t1001: id: bucket[4](3)\n]", spec1.String())
+
+ // only differs by PartitionField FieldID
+ idField2 := iceberg.PartitionField{
+ SourceID: 3, FieldID: 1002, Name: "id", Transform: bucket}
+ spec2 := iceberg.NewPartitionSpec(idField2)
+
+ assert.False(t, spec1.Equals(spec2))
+ assert.True(t, spec1.CompatibleWith(&spec2))
+ assert.Equal(t, []iceberg.PartitionField{idField1},
spec1.FieldsBySourceID(3))
+ assert.Empty(t, spec1.FieldsBySourceID(1925))
+
+ spec3 := iceberg.NewPartitionSpec(idField1, idField2)
+ assert.False(t, spec1.CompatibleWith(&spec3))
+ assert.Equal(t, 1002, spec3.LastAssignedFieldID())
+}
+
+func TestUnpartitionedWithVoidField(t *testing.T) {
+ spec := iceberg.NewPartitionSpec(iceberg.PartitionField{
+ SourceID: 3, FieldID: 1001, Name: "void", Transform:
iceberg.VoidTransform{},
+ })
+
+ assert.True(t, spec.IsUnpartitioned())
+
+ spec2 := iceberg.NewPartitionSpec(iceberg.PartitionField{
+ SourceID: 3, FieldID: 1001, Name: "void", Transform:
iceberg.VoidTransform{},
+ }, iceberg.PartitionField{
+ SourceID: 3, FieldID: 1002, Name: "bucket", Transform:
iceberg.BucketTransform{NumBuckets: 2},
+ })
+
+ assert.False(t, spec2.IsUnpartitioned())
+}
+
+func TestSerializeUnpartitionedSpec(t *testing.T) {
+ data, err := json.Marshal(iceberg.UnpartitionedSpec)
+ require.NoError(t, err)
+
+ assert.JSONEq(t, `{"spec-id": 0, "fields": []}`, string(data))
+ assert.True(t, iceberg.UnpartitionedSpec.IsUnpartitioned())
+}
+
+func TestSerializePartitionSpec(t *testing.T) {
+ spec := iceberg.NewPartitionSpecID(3,
+ iceberg.PartitionField{SourceID: 1, FieldID: 1000,
+ Transform: iceberg.TruncateTransform{Width: 19}, Name:
"str_truncate"},
+ iceberg.PartitionField{SourceID: 2, FieldID: 1001,
+ Transform: iceberg.BucketTransform{NumBuckets: 25},
Name: "int_bucket"},
+ )
+
+ data, err := json.Marshal(spec)
+ require.NoError(t, err)
+
+ assert.JSONEq(t, `{
+ "spec-id": 3,
+ "fields": [
+ {
+ "source-id": 1,
+ "field-id": 1000,
+ "transform": "truncate[19]",
+ "name": "str_truncate"
+ },
+ {
+ "source-id": 2,
+ "field-id": 1001,
+ "transform": "bucket[25]",
+ "name": "int_bucket"
+ }
+ ]
+ }`, string(data))
+
+ var outspec iceberg.PartitionSpec
+ require.NoError(t, json.Unmarshal(data, &outspec))
+
+ assert.True(t, spec.Equals(outspec))
+}
+
+func TestPartitionType(t *testing.T) {
+ spec := iceberg.NewPartitionSpecID(3,
+ iceberg.PartitionField{SourceID: 1, FieldID: 1000,
+ Transform: iceberg.TruncateTransform{Width: 19}, Name:
"str_truncate"},
+ iceberg.PartitionField{SourceID: 2, FieldID: 1001,
+ Transform: iceberg.BucketTransform{NumBuckets: 25},
Name: "int_bucket"},
+ iceberg.PartitionField{SourceID: 3, FieldID: 1002,
+ Transform: iceberg.IdentityTransform{}, Name:
"bool_identity"},
+ iceberg.PartitionField{SourceID: 1, FieldID: 1003,
+ Transform: iceberg.VoidTransform{}, Name: "str_void"},
+ )
+
+ expected := &iceberg.StructType{
+ FieldList: []iceberg.NestedField{
+ {ID: 1000, Name: "str_truncate", Type:
iceberg.PrimitiveTypes.String},
+ {ID: 1001, Name: "int_bucket", Type:
iceberg.PrimitiveTypes.Int32},
+ {ID: 1002, Name: "bool_identity", Type:
iceberg.PrimitiveTypes.Bool},
+ {ID: 1003, Name: "str_void", Type:
iceberg.PrimitiveTypes.String},
+ },
+ }
+ actual := spec.PartitionType(tableSchemaSimple)
+ assert.Truef(t, expected.Equals(actual), "expected: %s, got: %s",
expected, actual)
+}
diff --git a/schema.go b/schema.go
index e9e559b..8edce1f 100644
--- a/schema.go
+++ b/schema.go
@@ -214,7 +214,7 @@ func (s *Schema) FindFieldByNameCaseInsensitive(name
string) (NestedField, bool)
return s.FindFieldByID(id)
}
-// FindFieldByID is like [*Schema.FindColumnByName], but returns the whole
+// FindFieldByID is like [*Schema.FindColumnName], but returns the whole
// field rather than just the field name.
func (s *Schema) FindFieldByID(id int) (NestedField, bool) {
idx, _ := s.lazyIDToField()
diff --git a/transforms.go b/transforms.go
new file mode 100644
index 0000000..58d2011
--- /dev/null
+++ b/transforms.go
@@ -0,0 +1,173 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg
+
+import (
+ "encoding"
+ "fmt"
+ "strconv"
+ "strings"
+)
+
+// ParseTransform takes the string representation of a transform as
+// defined in the iceberg spec, and produces the appropriate Transform
+// object or an error if the string is not a valid transform string.
+func ParseTransform(s string) (Transform, error) {
+ s = strings.ToLower(s)
+ switch {
+ case strings.HasPrefix(s, "bucket"):
+ matches := regexFromBrackets.FindStringSubmatch(s)
+ if len(matches) != 2 {
+ break
+ }
+
+ n, _ := strconv.Atoi(matches[1])
+ return BucketTransform{NumBuckets: n}, nil
+ case strings.HasPrefix(s, "truncate"):
+ matches := regexFromBrackets.FindStringSubmatch(s)
+ if len(matches) != 2 {
+ break
+ }
+
+ n, _ := strconv.Atoi(matches[1])
+ return TruncateTransform{Width: n}, nil
+ default:
+ switch s {
+ case "identity":
+ return IdentityTransform{}, nil
+ case "void":
+ return VoidTransform{}, nil
+ case "year":
+ return YearTransform{}, nil
+ case "month":
+ return MonthTransform{}, nil
+ case "day":
+ return DayTransform{}, nil
+ case "hour":
+ return HourTransform{}, nil
+ }
+ }
+
+ return nil, fmt.Errorf("%w: %s", ErrInvalidTransform, s)
+}
+
+// Transform is an interface for the various Transformation types
+// in partition specs. Currently, they do not yet provide actual
+// transformation functions or implementation. That will come later as
+// data reading gets implemented.
+type Transform interface {
+ fmt.Stringer
+ encoding.TextMarshaler
+ ResultType(t Type) Type
+}
+
+// IdentityTransform uses the identity function, performing no transformation
+// but instead partitioning on the value itself.
+type IdentityTransform struct{}
+
+func (t IdentityTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (IdentityTransform) String() string { return "identity" }
+
+func (IdentityTransform) ResultType(t Type) Type { return t }
+
+// VoidTransform is a transformation that always returns nil.
+type VoidTransform struct{}
+
+func (t VoidTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (VoidTransform) String() string { return "void" }
+
+func (VoidTransform) ResultType(t Type) Type { return t }
+
+// BucketTransform transforms values into a bucket partition value. It is
+// parameterized by a number of buckets. Bucket partition transforms use
+// a 32-bit hash of the source value to produce a positive value by mod
+// the bucket number.
+type BucketTransform struct {
+ NumBuckets int
+}
+
+func (t BucketTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (t BucketTransform) String() string { return fmt.Sprintf("bucket[%d]",
t.NumBuckets) }
+
+func (BucketTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 }
+
+// TruncateTransform is a transformation for truncating a value to a specified
width.
+type TruncateTransform struct {
+ Width int
+}
+
+func (t TruncateTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (t TruncateTransform) String() string { return
fmt.Sprintf("truncate[%d]", t.Width) }
+
+func (TruncateTransform) ResultType(t Type) Type { return t }
+
+// YearTransform transforms a datetime value into a year value.
+type YearTransform struct{}
+
+func (t YearTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (YearTransform) String() string { return "year" }
+
+func (YearTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 }
+
+// MonthTransform transforms a datetime value into a month value.
+type MonthTransform struct{}
+
+func (t MonthTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (MonthTransform) String() string { return "month" }
+
+func (MonthTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 }
+
+// DayTransform transforms a datetime value into a date value.
+type DayTransform struct{}
+
+func (t DayTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (DayTransform) String() string { return "day" }
+
+func (DayTransform) ResultType(Type) Type { return PrimitiveTypes.Date }
+
+// HourTransform transforms a datetime value into an hour value.
+type HourTransform struct{}
+
+func (t HourTransform) MarshalText() ([]byte, error) {
+ return []byte(t.String()), nil
+}
+
+func (HourTransform) String() string { return "hour" }
+
+func (HourTransform) ResultType(Type) Type { return PrimitiveTypes.Int32 }
diff --git a/transforms_test.go b/transforms_test.go
new file mode 100644
index 0000000..a455ede
--- /dev/null
+++ b/transforms_test.go
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package iceberg_test
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/apache/iceberg-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestParseTransform(t *testing.T) {
+ tests := []struct {
+ toparse string
+ expected iceberg.Transform
+ }{
+ {"identity", iceberg.IdentityTransform{}},
+ {"IdEnTiTy", iceberg.IdentityTransform{}},
+ {"void", iceberg.VoidTransform{}},
+ {"VOId", iceberg.VoidTransform{}},
+ {"year", iceberg.YearTransform{}},
+ {"yEAr", iceberg.YearTransform{}},
+ {"month", iceberg.MonthTransform{}},
+ {"MONtH", iceberg.MonthTransform{}},
+ {"day", iceberg.DayTransform{}},
+ {"DaY", iceberg.DayTransform{}},
+ {"hour", iceberg.HourTransform{}},
+ {"hOuR", iceberg.HourTransform{}},
+ {"bucket[5]", iceberg.BucketTransform{NumBuckets: 5}},
+ {"bucket[100]", iceberg.BucketTransform{NumBuckets: 100}},
+ {"BUCKET[5]", iceberg.BucketTransform{NumBuckets: 5}},
+ {"bUCKeT[100]", iceberg.BucketTransform{NumBuckets: 100}},
+ {"truncate[10]", iceberg.TruncateTransform{Width: 10}},
+ {"truncate[255]", iceberg.TruncateTransform{Width: 255}},
+ {"TRUNCATE[10]", iceberg.TruncateTransform{Width: 10}},
+ {"tRuNCATe[255]", iceberg.TruncateTransform{Width: 255}},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.toparse, func(t *testing.T) {
+ transform, err := iceberg.ParseTransform(tt.toparse)
+ require.NoError(t, err)
+ assert.Equal(t, tt.expected, transform)
+
+ txt, err := transform.MarshalText()
+ assert.NoError(t, err)
+ assert.Equal(t, strings.ToLower(tt.toparse),
string(txt))
+ })
+ }
+
+ errorTests := []struct {
+ name string
+ toparse string
+ }{
+ {"foobar", "foobar"},
+ {"bucket no brackets", "bucket"},
+ {"truncate no brackets", "truncate"},
+ {"bucket no val", "bucket[]"},
+ {"truncate no val", "truncate[]"},
+ {"bucket neg", "bucket[-1]"},
+ {"truncate neg", "truncate[-1]"},
+ }
+
+ for _, tt := range errorTests {
+ t.Run(tt.name, func(t *testing.T) {
+ tr, err := iceberg.ParseTransform(tt.toparse)
+ assert.Nil(t, tr)
+ assert.ErrorIs(t, err, iceberg.ErrInvalidTransform)
+ assert.ErrorContains(t, err, tt.toparse)
+ })
+ }
+}
diff --git a/utils.go b/utils.go
index ccb6bbc..907a35f 100644
--- a/utils.go
+++ b/utils.go
@@ -19,6 +19,7 @@ package iceberg
import (
"runtime/debug"
+ "strings"
"golang.org/x/exp/constraints"
)
@@ -29,9 +30,9 @@ func init() {
version = "(unknown version)"
if info, ok := debug.ReadBuildInfo(); ok {
for _, dep := range info.Deps {
- switch {
- case dep.Path == "github.com/apache/iceberg/go/iceberg":
+ if strings.HasPrefix(dep.Path,
"github.com/apache/iceberg-go") {
version = dep.Version
+ break
}
}
}