zeroshade commented on a change in pull request #10071:
URL: https://github.com/apache/arrow/pull/10071#discussion_r619923697



##########
File path: go/parquet/schema/column.go
##########
@@ -0,0 +1,110 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       "fmt"
+       "strings"
+
+       "github.com/apache/arrow/go/parquet"
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+       "golang.org/x/xerrors"
+)
+
+// Column encapsulates the information necessary to interpret primitive
+// column data in the context of a particular schema. We have to examine
+// the node structure of a column's path to the root in the schema tree
+// to be able to reassemble the nested structure from the repetition and
+// definition levels.
+type Column struct {
+       pnode *PrimitiveNode
+       // the maximum definition level in this column
+       maxDefLvl int16
+       // the maximum repetition level in this column

Review comment:
       updated

##########
File path: go/parquet/schema/converted_types.go
##########
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
+// with added values of None and NA for handling when these values are not
+// set in the metadata
+type ConvertedType format.ConvertedType
+
+var (
+       // ConvertedTypes is a struct containing the constants for the types
+       // to make it easy to reference them while making it clear what they are
+       ConvertedTypes = struct {
+               None            ConvertedType
+               UTF8            ConvertedType
+               Map             ConvertedType
+               MapKeyValue     ConvertedType
+               List            ConvertedType
+               Enum            ConvertedType
+               Decimal         ConvertedType
+               Date            ConvertedType
+               TimeMillis      ConvertedType
+               TimeMicros      ConvertedType
+               TimestampMillis ConvertedType
+               TimestampMicros ConvertedType
+               Uint8           ConvertedType
+               Uint16          ConvertedType
+               Uint32          ConvertedType
+               Uint64          ConvertedType
+               Int8            ConvertedType
+               Int16           ConvertedType
+               Int32           ConvertedType
+               Int64           ConvertedType
+               JSON            ConvertedType
+               BSON            ConvertedType
+               Interval        ConvertedType
+               NA              ConvertedType
+       }{
+               None:            -1, // thrift enum starts at 0, so we know 
this will not be used
+               UTF8:            ConvertedType(format.ConvertedType_UTF8),
+               Map:             ConvertedType(format.ConvertedType_MAP),
+               MapKeyValue:     
ConvertedType(format.ConvertedType_MAP_KEY_VALUE),
+               List:            ConvertedType(format.ConvertedType_LIST),
+               Enum:            ConvertedType(format.ConvertedType_ENUM),
+               Decimal:         ConvertedType(format.ConvertedType_DECIMAL),
+               Date:            ConvertedType(format.ConvertedType_DATE),
+               TimeMillis:      
ConvertedType(format.ConvertedType_TIME_MILLIS),
+               TimeMicros:      
ConvertedType(format.ConvertedType_TIME_MICROS),
+               TimestampMillis: 
ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS),
+               TimestampMicros: 
ConvertedType(format.ConvertedType_TIMESTAMP_MICROS),
+               Uint8:           ConvertedType(format.ConvertedType_UINT_8),
+               Uint16:          ConvertedType(format.ConvertedType_UINT_16),
+               Uint32:          ConvertedType(format.ConvertedType_UINT_32),
+               Uint64:          ConvertedType(format.ConvertedType_UINT_64),
+               Int8:            ConvertedType(format.ConvertedType_INT_8),
+               Int16:           ConvertedType(format.ConvertedType_INT_16),
+               Int32:           ConvertedType(format.ConvertedType_INT_32),
+               Int64:           ConvertedType(format.ConvertedType_INT_64),
+               JSON:            ConvertedType(format.ConvertedType_JSON),
+               BSON:            ConvertedType(format.ConvertedType_BSON),
+               Interval:        ConvertedType(format.ConvertedType_INTERVAL),
+               NA:              24, // should always be the last values after 
Interval
+       }
+)
+
+func (p ConvertedType) String() string {
+       switch p {
+       case ConvertedTypes.None:
+               return "NONE"
+       case ConvertedTypes.NA:
+               return "UNKNOWN"
+       default:
+               return format.ConvertedType(p).String()
+       }
+}
+
+// ToLogicalType returns the correct LogicalType for the given ConvertedType, 
using the decimal
+// metadata provided to define the precision/scale if necessary
+func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) 
LogicalType {
+       switch p {
+       case ConvertedTypes.UTF8:
+               return StringLogicalType{}
+       case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+               return MapLogicalType{}
+       case ConvertedTypes.List:
+               return ListLogicalType{}
+       case ConvertedTypes.Enum:
+               return EnumLogicalType{}
+       case ConvertedTypes.Decimal:
+               return NewDecimalLogicalType(convertedDecimal.Precision, 
convertedDecimal.Scale)
+       case ConvertedTypes.Date:
+               return DateLogicalType{}
+       case ConvertedTypes.TimeMillis:
+               return NewTimeLogicalType(true, TimeUnitMillis)

Review comment:
       done

##########
File path: go/parquet/schema/converted_types.go
##########
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
+// with added values of None and NA for handling when these values are not
+// set in the metadata
+type ConvertedType format.ConvertedType
+
+var (
+       // ConvertedTypes is a struct containing the constants for the types
+       // to make it easy to reference them while making it clear what they are
+       ConvertedTypes = struct {
+               None            ConvertedType
+               UTF8            ConvertedType
+               Map             ConvertedType
+               MapKeyValue     ConvertedType
+               List            ConvertedType
+               Enum            ConvertedType
+               Decimal         ConvertedType
+               Date            ConvertedType
+               TimeMillis      ConvertedType
+               TimeMicros      ConvertedType
+               TimestampMillis ConvertedType
+               TimestampMicros ConvertedType
+               Uint8           ConvertedType
+               Uint16          ConvertedType
+               Uint32          ConvertedType
+               Uint64          ConvertedType
+               Int8            ConvertedType
+               Int16           ConvertedType
+               Int32           ConvertedType
+               Int64           ConvertedType
+               JSON            ConvertedType
+               BSON            ConvertedType
+               Interval        ConvertedType
+               NA              ConvertedType
+       }{
+               None:            -1, // thrift enum starts at 0, so we know 
this will not be used
+               UTF8:            ConvertedType(format.ConvertedType_UTF8),
+               Map:             ConvertedType(format.ConvertedType_MAP),
+               MapKeyValue:     
ConvertedType(format.ConvertedType_MAP_KEY_VALUE),
+               List:            ConvertedType(format.ConvertedType_LIST),
+               Enum:            ConvertedType(format.ConvertedType_ENUM),
+               Decimal:         ConvertedType(format.ConvertedType_DECIMAL),
+               Date:            ConvertedType(format.ConvertedType_DATE),
+               TimeMillis:      
ConvertedType(format.ConvertedType_TIME_MILLIS),
+               TimeMicros:      
ConvertedType(format.ConvertedType_TIME_MICROS),
+               TimestampMillis: 
ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS),
+               TimestampMicros: 
ConvertedType(format.ConvertedType_TIMESTAMP_MICROS),
+               Uint8:           ConvertedType(format.ConvertedType_UINT_8),
+               Uint16:          ConvertedType(format.ConvertedType_UINT_16),
+               Uint32:          ConvertedType(format.ConvertedType_UINT_32),
+               Uint64:          ConvertedType(format.ConvertedType_UINT_64),
+               Int8:            ConvertedType(format.ConvertedType_INT_8),
+               Int16:           ConvertedType(format.ConvertedType_INT_16),
+               Int32:           ConvertedType(format.ConvertedType_INT_32),
+               Int64:           ConvertedType(format.ConvertedType_INT_64),
+               JSON:            ConvertedType(format.ConvertedType_JSON),
+               BSON:            ConvertedType(format.ConvertedType_BSON),
+               Interval:        ConvertedType(format.ConvertedType_INTERVAL),
+               NA:              24, // should always be the last values after 
Interval
+       }
+)
+
+func (p ConvertedType) String() string {
+       switch p {
+       case ConvertedTypes.None:
+               return "NONE"
+       case ConvertedTypes.NA:
+               return "UNKNOWN"
+       default:
+               return format.ConvertedType(p).String()
+       }
+}
+
+// ToLogicalType returns the correct LogicalType for the given ConvertedType, 
using the decimal
+// metadata provided to define the precision/scale if necessary
+func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) 
LogicalType {
+       switch p {
+       case ConvertedTypes.UTF8:
+               return StringLogicalType{}
+       case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+               return MapLogicalType{}
+       case ConvertedTypes.List:
+               return ListLogicalType{}
+       case ConvertedTypes.Enum:
+               return EnumLogicalType{}
+       case ConvertedTypes.Decimal:
+               return NewDecimalLogicalType(convertedDecimal.Precision, 
convertedDecimal.Scale)
+       case ConvertedTypes.Date:
+               return DateLogicalType{}
+       case ConvertedTypes.TimeMillis:
+               return NewTimeLogicalType(true, TimeUnitMillis)
+       case ConvertedTypes.TimeMicros:
+               return NewTimeLogicalType(true, TimeUnitMicros)
+       case ConvertedTypes.TimestampMillis:
+               t := NewTimestampLogicalType(true, TimeUnitMillis)
+               t.(*TimestampLogicalType).fromConverted = true
+               return t
+       case ConvertedTypes.TimestampMicros:
+               t := NewTimestampLogicalType(true, TimeUnitMicros)
+               t.(*TimestampLogicalType).fromConverted = true
+               return t
+       case ConvertedTypes.Interval:
+               return IntervalLogicalType{}
+       case ConvertedTypes.Int8:
+               return NewIntLogicalType(8, true)

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to