zeroshade commented on a change in pull request #10071:
URL: https://github.com/apache/arrow/pull/10071#discussion_r619923761



##########
File path: go/parquet/schema/helpers.go
##########
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import "github.com/apache/arrow/go/parquet"
+
+// ListOf is a convenience helper function to create a properly structured
+// list structure according to the Parquet Spec.
+//
+// <list-repetition> group <name> (LIST) {
+//   repeated group list {
+//     <element-repetition> <element-type> element;
+//   }
+// }
+//
+// <list-repetition> can only be optional or required. panics if repeated.
+// <element-repetition> can only be optional or required. panics if repeated.
+func ListOf(n Node, rep parquet.Repetition, fieldID int32) *GroupNode {
+       if rep == parquet.Repetitions.Repeated || n.RepetitionType() == 
parquet.Repetitions.Repeated {
+               panic("parquet: listof repetition and element repetition must 
not be repeated.")
+       }
+       listName := n.Name()
+
+       switch n := n.(type) {
+       case *PrimitiveNode:
+               n.name = "element"
+       case *GroupNode:
+               n.name = "element"
+       }
+
+       return NewGroupNodeLogical(listName, rep, FieldList{
+               NewGroupNode("list", parquet.Repetitions.Repeated, 
FieldList{n}, -1)},

Review comment:
       done

##########
File path: go/parquet/schema/helpers.go
##########
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import "github.com/apache/arrow/go/parquet"
+
+// ListOf is a convenience helper function to create a properly structured
+// list structure according to the Parquet Spec.
+//
+// <list-repetition> group <name> (LIST) {
+//   repeated group list {
+//     <element-repetition> <element-type> element;
+//   }
+// }
+//
+// <list-repetition> can only be optional or required. panics if repeated.
+// <element-repetition> can only be optional or required. panics if repeated.
+func ListOf(n Node, rep parquet.Repetition, fieldID int32) *GroupNode {
+       if rep == parquet.Repetitions.Repeated || n.RepetitionType() == 
parquet.Repetitions.Repeated {
+               panic("parquet: listof repetition and element repetition must 
not be repeated.")
+       }
+       listName := n.Name()
+
+       switch n := n.(type) {
+       case *PrimitiveNode:
+               n.name = "element"
+       case *GroupNode:
+               n.name = "element"
+       }
+
+       return NewGroupNodeLogical(listName, rep, FieldList{
+               NewGroupNode("list", parquet.Repetitions.Repeated, 
FieldList{n}, -1)},
+               ListLogicalType{}, fieldID)
+}
+
+// MapOf is a convenience helper function to create a properly structured
+// parquet map node setup according to the Parquet Spec.
+//
+// <map-repetition> group <name> (MAP) {
+//      repeated group key_value {
+//        required <key-type> key;
+//     <value-repetition> <value-type> value;
+//   }
+// }
+//
+// key node will be renamed to "key", value node if not nil will be renamed to 
"value"
+//
+// <map-repetition> must be only optional or required. panics if repeated is 
passed.
+//
+// the key node *must* be required repetition. panics if optional or repeated
+//
+// value node can be nil (omitted) or have a repetition of required or 
optional *only*.
+// panics if value node is not nil and has a repetition of repeated.
+func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, 
fieldID int32) *GroupNode {
+       if mapRep == parquet.Repetitions.Repeated {
+               panic("parquet: map repetition cannot be Repeated")
+       }
+       if key.RepetitionType() != parquet.Repetitions.Required {
+               panic("parquet: map key repetition must be Required")
+       }
+       if value != nil {
+               if value.RepetitionType() == parquet.Repetitions.Repeated {
+                       panic("parquet: map value cannot have repetition 
Repeated")
+               }
+               switch value := value.(type) {
+               case *PrimitiveNode:
+                       value.name = "value"
+               case *GroupNode:
+                       value.name = "value"
+               }
+       }
+
+       switch key := key.(type) {
+       case *PrimitiveNode:
+               key.name = "key"
+       case *GroupNode:
+               key.name = "key"
+       }
+
+       keyval := FieldList{key}
+       if value != nil {
+               keyval = append(keyval, value)
+       }
+
+       return NewGroupNodeLogical(name, mapRep, FieldList{
+               NewGroupNode("key_value", parquet.Repetitions.Repeated, keyval, 
-1),

Review comment:
       done

##########
File path: go/parquet/schema/logical_types_test.go
##########
@@ -0,0 +1,551 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema_test
+
+import (
+       "encoding/json"
+       "testing"
+
+       "github.com/apache/arrow/go/parquet"
+       "github.com/apache/arrow/go/parquet/schema"
+       "github.com/stretchr/testify/assert"
+)
+
+func TestConvertedLogicalEquivalences(t *testing.T) {
+       tests := []struct {
+               name      string
+               converted schema.ConvertedType
+               logical   schema.LogicalType
+               expected  schema.LogicalType
+       }{
+               {"utf8", schema.ConvertedTypes.UTF8, 
schema.StringLogicalType{}, schema.StringLogicalType{}},
+               {"map", schema.ConvertedTypes.Map, schema.MapLogicalType{}, 
schema.MapLogicalType{}},
+               {"mapkeyval", schema.ConvertedTypes.MapKeyValue, 
schema.MapLogicalType{}, schema.MapLogicalType{}},
+               {"list", schema.ConvertedTypes.List, 
schema.NewListLogicalType(), schema.NewListLogicalType()},
+               {"enum", schema.ConvertedTypes.Enum, schema.EnumLogicalType{}, 
schema.EnumLogicalType{}},
+               {"date", schema.ConvertedTypes.Date, schema.DateLogicalType{}, 
schema.DateLogicalType{}},
+               {"timemilli", schema.ConvertedTypes.TimeMillis, 
schema.NewTimeLogicalType(true, schema.TimeUnitMillis), 
&schema.TimeLogicalType{}},
+               {"timemicro", schema.ConvertedTypes.TimeMicros, 
schema.NewTimeLogicalType(true, schema.TimeUnitMicros), 
&schema.TimeLogicalType{}},
+               {"timestampmilli", schema.ConvertedTypes.TimestampMillis, 
schema.NewTimestampLogicalType(true, schema.TimeUnitMillis), 
&schema.TimestampLogicalType{}},
+               {"timestampmicro", schema.ConvertedTypes.TimestampMicros, 
schema.NewTimestampLogicalType(true, schema.TimeUnitMicros), 
&schema.TimestampLogicalType{}},
+               {"uint8", schema.ConvertedTypes.Uint8, 
schema.NewIntLogicalType(8, false), &schema.IntLogicalType{}},
+               {"uint16", schema.ConvertedTypes.Uint16, 
schema.NewIntLogicalType(16, false), &schema.IntLogicalType{}},
+               {"uint32", schema.ConvertedTypes.Uint32, 
schema.NewIntLogicalType(32, false), &schema.IntLogicalType{}},
+               {"uint64", schema.ConvertedTypes.Uint64, 
schema.NewIntLogicalType(64, false), &schema.IntLogicalType{}},
+               {"int8", schema.ConvertedTypes.Int8, 
schema.NewIntLogicalType(8, true), &schema.IntLogicalType{}},

Review comment:
       done

##########
File path: go/parquet/schema/node.go
##########
@@ -0,0 +1,591 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       "github.com/apache/arrow/go/parquet"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/thrift/lib/go/thrift"
+       "golang.org/x/xerrors"
+)
+
+// NodeType describes whether the Node is a Primitive or Group node
+type NodeType int
+
+// the available constants for NodeType
+const (
+       Primitive NodeType = iota
+       Group
+)
+
+// Node is the interface for both Group and Primitive Nodes.
+// A logical schema type has a name, repetition level, and optionally
+// a logical type (ConvertedType in Parquet metadata parlance)

Review comment:
       fixed

##########
File path: go/parquet/schema/node.go
##########
@@ -0,0 +1,591 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       "github.com/apache/arrow/go/parquet"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "github.com/apache/thrift/lib/go/thrift"
+       "golang.org/x/xerrors"
+)
+
+// NodeType describes whether the Node is a Primitive or Group node
+type NodeType int
+
+// the available constants for NodeType
+const (
+       Primitive NodeType = iota
+       Group
+)
+
+// Node is the interface for both Group and Primitive Nodes.
+// A logical schema type has a name, repetition level, and optionally
+// a logical type (ConvertedType in Parquet metadata parlance)
+type Node interface {
+       Name() string
+       Type() NodeType
+       RepetitionType() parquet.Repetition
+       ConvertedType() ConvertedType
+       LogicalType() LogicalType
+       FieldID() int32
+       Parent() Node
+       SetParent(Node)
+       Path() string
+       Equals(Node) bool
+       Visit(v Visitor)
+       toThrift() *format.SchemaElement
+}
+
+// Visitor is an interface for creating functionality to walk the schema tree.
+//
+// A visitor can be passed to the Visit function of a Node in order to walk
+// the tree. VisitPre is called the first time a node is encountered. If
+// it is a group node, the return is checked and if it is false, the children
+// will be skipped.
+//
+// VisitPost is called after visiting any children
+type Visitor interface {
+       VisitPre(Node) bool
+       VisitPost(Node)
+}
+
+// ColumnPathFromNode walks the parents of the given node to construct it's
+// column path
+func ColumnPathFromNode(n Node) parquet.ColumnPath {
+       if n == nil {
+               return nil
+       }
+
+       c := make([]string, 0)
+
+       cursor := n
+       for cursor.Parent() != nil {
+               c = append(c, cursor.Name())
+               cursor = cursor.Parent()
+       }
+
+       for i := len(c)/2 - 1; i >= 0; i-- {

Review comment:
       added comments




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to