nastra commented on code in PR #8122: URL: https://github.com/apache/iceberg/pull/8122#discussion_r1272089101
########## go/iceberg/doc.go: ########## @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Package iceberg is an implementation of the +// [Iceberg table spec](https://iceberg.apache.org/spec/). Review Comment: nit: I think this can be removed ########## go/iceberg/types.go: ########## @@ -0,0 +1,582 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "encoding/json" + "fmt" + "regexp" + "strconv" + "strings" + + "golang.org/x/exp/slices" +) + +var ( + regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`) + decimalRegex = regexp.MustCompile(`decimal\((\d+),\s*(\d+)\)`) Review Comment: I think this should align with the REGEX used in https://github.com/apache/iceberg/blob/776c6e6e61bc90573545f9dc788ee40eb451254d/api/src/main/java/org/apache/iceberg/types/Types.java#L58 (which allows spaces in between) ########## go/iceberg/types.go: ########## @@ -0,0 +1,582 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "encoding/json" + "fmt" + "regexp" + "strconv" + "strings" + + "golang.org/x/exp/slices" +) + +var ( + regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`) + decimalRegex = regexp.MustCompile(`decimal\((\d+),\s*(\d+)\)`) +) + +type Properties map[string]string + +// Type is an interface representing any of the available iceberg types, +// such as primitives (int32/int64/etc.) or nested types (list/struct/map). +type Type interface { + fmt.Stringer + Type() string + Equals(Type) bool +} + +// NestedType is an interface that allows access to the child fields of +// a nested type such as a list/struct/map type. +type NestedType interface { + Type + Children() []NestedField +} + +type typeIFace struct { + Type +} + +func (t *typeIFace) MarshalJSON() ([]byte, error) { + return []byte(`"` + t.Type.Type() + `"`), nil +} + +func (t *typeIFace) UnmarshalJSON(b []byte) error { + var typename string + err := json.Unmarshal(b, &typename) + if err == nil { + switch typename { + case "boolean": + t.Type = BooleanType{} + case "int": + t.Type = Int32Type{} + case "long": + t.Type = Int64Type{} + case "float": + t.Type = Float32Type{} + case "double": + t.Type = Float64Type{} + case "date": + t.Type = DateType{} + case "time": + t.Type = TimeType{} + case "timestamp": + t.Type = TimestampType{} + case "timestamptz": + t.Type = TimestampTzType{} + case "string": + t.Type = StringType{} + case "uuid": + t.Type = UUIDType{} + case "binary": + t.Type = BinaryType{} + default: + switch { + case strings.HasPrefix(typename, "fixed"): Review Comment: should we use the same regex that we're using on the Java side? ########## go/iceberg/schema_test.go: ########## @@ -0,0 +1,633 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg_test + +import ( Review Comment: I think all types should be tested, similar to `TestSchemaConversions` ########## go/iceberg/README.md: ########## @@ -0,0 +1,26 @@ +<!-- + - Licensed to the Apache Software Foundation (ASF) under one or more + - contributor license agreements. See the NOTICE file distributed with + - this work for additional information regarding copyright ownership. + - The ASF licenses this file to You under the Apache License, Version 2.0 + - (the "License"); you may not use this file except in compliance with + - the License. You may obtain a copy of the License at + - + - http://www.apache.org/licenses/LICENSE-2.0 + - + - Unless required by applicable law or agreed to in writing, software + - distributed under the License is distributed on an "AS IS" BASIS, + - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + - See the License for the specific language governing permissions and + - limitations under the License. + --> + +# Iceberg Golang + +[](https://pkg.go.dev/github.com/apache/iceberg/go/iceberg) + +`iceberg` is a Golang implementation of the [Iceberg table spec](https://iceberg.apache.org/spec/). + +# Get in Touch + +- [Iceberg community](https://iceberg.apache.org/community/) Review Comment: I think it would be good to have a contributing section or file for GO so that people know how to set up Intellij and how to run tests and such (can be a follow-up) ########## go/iceberg/types.go: ########## @@ -0,0 +1,582 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package iceberg + +import ( + "encoding/json" + "fmt" + "regexp" + "strconv" + "strings" + + "golang.org/x/exp/slices" +) + +var ( + regexFromBrackets = regexp.MustCompile(`^\w+\[(\d+)\]$`) + decimalRegex = regexp.MustCompile(`decimal\((\d+),\s*(\d+)\)`) +) + +type Properties map[string]string + +// Type is an interface representing any of the available iceberg types, +// such as primitives (int32/int64/etc.) or nested types (list/struct/map). +type Type interface { + fmt.Stringer + Type() string + Equals(Type) bool +} + +// NestedType is an interface that allows access to the child fields of +// a nested type such as a list/struct/map type. +type NestedType interface { + Type + Children() []NestedField +} + +type typeIFace struct { + Type +} + +func (t *typeIFace) MarshalJSON() ([]byte, error) { + return []byte(`"` + t.Type.Type() + `"`), nil +} + +func (t *typeIFace) UnmarshalJSON(b []byte) error { + var typename string + err := json.Unmarshal(b, &typename) + if err == nil { + switch typename { + case "boolean": + t.Type = BooleanType{} + case "int": + t.Type = Int32Type{} + case "long": + t.Type = Int64Type{} + case "float": + t.Type = Float32Type{} + case "double": + t.Type = Float64Type{} + case "date": + t.Type = DateType{} + case "time": + t.Type = TimeType{} + case "timestamp": + t.Type = TimestampType{} + case "timestamptz": + t.Type = TimestampTzType{} + case "string": + t.Type = StringType{} + case "uuid": + t.Type = UUIDType{} + case "binary": + t.Type = BinaryType{} + default: + switch { + case strings.HasPrefix(typename, "fixed"): + matches := regexFromBrackets.FindStringSubmatch(typename) + if len(matches) != 2 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + + n, _ := strconv.Atoi(matches[1]) + t.Type = FixedType{len: n} + case strings.HasPrefix(typename, "decimal"): + matches := decimalRegex.FindStringSubmatch(typename) + if len(matches) != 3 { + return fmt.Errorf("%w: %s", ErrInvalidTypeString, typename) + } + + prec, _ := strconv.Atoi(matches[1]) + scale, _ := strconv.Atoi(matches[2]) + t.Type = DecimalType{precision: prec, scale: scale} + default: + return fmt.Errorf("%w: unrecognized field type", ErrInvalidSchema) + } + } + return nil + } + + aux := struct { + TypeName string `json:"type"` + }{} + if err = json.Unmarshal(b, &aux); err != nil { + return err + } + + switch aux.TypeName { + case "list": + t.Type = &ListType{} + case "map": + t.Type = &MapType{} + case "struct": + t.Type = &StructType{} + default: + return fmt.Errorf("%w: %s", ErrInvalidTypeString, aux.TypeName) + } + + return json.Unmarshal(b, t.Type) +} + +type NestedField struct { + Type `json:"-"` + + ID int `json:"id"` + Name string `json:"name"` + Required bool `json:"required"` + Doc string `json:"doc,omitempty"` + InitialDefault any `json:"initial-default,omitempty"` Review Comment: these 2 fields are from v3 of the spec, which neither java nor python have an implementation for. I'm ok keeping those, but just FYI -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
