emkornfield commented on a change in pull request #10071:
URL: https://github.com/apache/arrow/pull/10071#discussion_r619882824



##########
File path: go/parquet/schema/converted_types.go
##########
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package schema
+
+import (
+       format "github.com/apache/arrow/go/parquet/internal/gen-go/parquet"
+)
+
+// ConvertedType corresponds to the ConvertedType in the parquet.Thrift,
+// with added values of None and NA for handling when these values are not
+// set in the metadata
+type ConvertedType format.ConvertedType
+
+var (
+       // ConvertedTypes is a struct containing the constants for the types
+       // to make it easy to reference them while making it clear what they are
+       ConvertedTypes = struct {
+               None            ConvertedType
+               UTF8            ConvertedType
+               Map             ConvertedType
+               MapKeyValue     ConvertedType
+               List            ConvertedType
+               Enum            ConvertedType
+               Decimal         ConvertedType
+               Date            ConvertedType
+               TimeMillis      ConvertedType
+               TimeMicros      ConvertedType
+               TimestampMillis ConvertedType
+               TimestampMicros ConvertedType
+               Uint8           ConvertedType
+               Uint16          ConvertedType
+               Uint32          ConvertedType
+               Uint64          ConvertedType
+               Int8            ConvertedType
+               Int16           ConvertedType
+               Int32           ConvertedType
+               Int64           ConvertedType
+               JSON            ConvertedType
+               BSON            ConvertedType
+               Interval        ConvertedType
+               NA              ConvertedType
+       }{
+               None:            -1, // thrift enum starts at 0, so we know 
this will not be used
+               UTF8:            ConvertedType(format.ConvertedType_UTF8),
+               Map:             ConvertedType(format.ConvertedType_MAP),
+               MapKeyValue:     
ConvertedType(format.ConvertedType_MAP_KEY_VALUE),
+               List:            ConvertedType(format.ConvertedType_LIST),
+               Enum:            ConvertedType(format.ConvertedType_ENUM),
+               Decimal:         ConvertedType(format.ConvertedType_DECIMAL),
+               Date:            ConvertedType(format.ConvertedType_DATE),
+               TimeMillis:      
ConvertedType(format.ConvertedType_TIME_MILLIS),
+               TimeMicros:      
ConvertedType(format.ConvertedType_TIME_MICROS),
+               TimestampMillis: 
ConvertedType(format.ConvertedType_TIMESTAMP_MILLIS),
+               TimestampMicros: 
ConvertedType(format.ConvertedType_TIMESTAMP_MICROS),
+               Uint8:           ConvertedType(format.ConvertedType_UINT_8),
+               Uint16:          ConvertedType(format.ConvertedType_UINT_16),
+               Uint32:          ConvertedType(format.ConvertedType_UINT_32),
+               Uint64:          ConvertedType(format.ConvertedType_UINT_64),
+               Int8:            ConvertedType(format.ConvertedType_INT_8),
+               Int16:           ConvertedType(format.ConvertedType_INT_16),
+               Int32:           ConvertedType(format.ConvertedType_INT_32),
+               Int64:           ConvertedType(format.ConvertedType_INT_64),
+               JSON:            ConvertedType(format.ConvertedType_JSON),
+               BSON:            ConvertedType(format.ConvertedType_BSON),
+               Interval:        ConvertedType(format.ConvertedType_INTERVAL),
+               NA:              24, // should always be the last values after 
Interval
+       }
+)
+
+func (p ConvertedType) String() string {
+       switch p {
+       case ConvertedTypes.None:
+               return "NONE"
+       case ConvertedTypes.NA:
+               return "UNKNOWN"
+       default:
+               return format.ConvertedType(p).String()
+       }
+}
+
+// ToLogicalType returns the correct LogicalType for the given ConvertedType, 
using the decimal
+// metadata provided to define the precision/scale if necessary
+func (p ConvertedType) ToLogicalType(convertedDecimal DecimalMetadata) 
LogicalType {
+       switch p {
+       case ConvertedTypes.UTF8:
+               return StringLogicalType{}
+       case ConvertedTypes.Map, ConvertedTypes.MapKeyValue:
+               return MapLogicalType{}
+       case ConvertedTypes.List:
+               return ListLogicalType{}
+       case ConvertedTypes.Enum:
+               return EnumLogicalType{}
+       case ConvertedTypes.Decimal:
+               return NewDecimalLogicalType(convertedDecimal.Precision, 
convertedDecimal.Scale)
+       case ConvertedTypes.Date:
+               return DateLogicalType{}
+       case ConvertedTypes.TimeMillis:
+               return NewTimeLogicalType(true, TimeUnitMillis)
+       case ConvertedTypes.TimeMicros:
+               return NewTimeLogicalType(true, TimeUnitMicros)
+       case ConvertedTypes.TimestampMillis:
+               t := NewTimestampLogicalType(true, TimeUnitMillis)
+               t.(*TimestampLogicalType).fromConverted = true
+               return t
+       case ConvertedTypes.TimestampMicros:
+               t := NewTimestampLogicalType(true, TimeUnitMicros)
+               t.(*TimestampLogicalType).fromConverted = true
+               return t
+       case ConvertedTypes.Interval:
+               return IntervalLogicalType{}
+       case ConvertedTypes.Int8:
+               return NewIntLogicalType(8, true)
+       case ConvertedTypes.Int16:
+               return NewIntLogicalType(16, true)
+       case ConvertedTypes.Int32:
+               return NewIntLogicalType(32, true)
+       case ConvertedTypes.Int64:
+               return NewIntLogicalType(64, true)
+       case ConvertedTypes.Uint8:
+               return NewIntLogicalType(8, false)
+       case ConvertedTypes.Uint16:
+               return NewIntLogicalType(16, false)
+       case ConvertedTypes.Uint32:
+               return NewIntLogicalType(32, false)
+       case ConvertedTypes.Uint64:
+               return NewIntLogicalType(64, false)
+       case ConvertedTypes.JSON:
+               return JSONLogicalType{}
+       case ConvertedTypes.BSON:
+               return BSONLogicalType{}
+       case ConvertedTypes.None:
+               return NoLogicalType{}
+       case ConvertedTypes.NA:
+               fallthrough
+       default:
+               return UnknownLogicalType{}
+       }
+}
+
+// GetSortOrder defaults to the sort order based on the physical type if 
convert
+// is ConvertedTypes.None, otherwise determines the sort order by the 
converted type.
+func GetSortOrder(convert ConvertedType, primitive format.Type) SortOrder {
+       if convert == ConvertedTypes.None {
+               return DefaultSortOrder(primitive)
+       }
+       switch convert {
+       case ConvertedTypes.Int8,
+               ConvertedTypes.Int16,
+               ConvertedTypes.Int32,
+               ConvertedTypes.Int64,
+               ConvertedTypes.Date,
+               ConvertedTypes.TimeMicros,
+               ConvertedTypes.TimeMillis,
+               ConvertedTypes.TimestampMicros,
+               ConvertedTypes.TimestampMillis:
+               return SortSIGNED
+       case ConvertedTypes.Uint8,
+               ConvertedTypes.Uint16,
+               ConvertedTypes.Uint32,
+               ConvertedTypes.Uint64,
+               ConvertedTypes.Enum,
+               ConvertedTypes.UTF8,
+               ConvertedTypes.BSON,
+               ConvertedTypes.JSON:
+               return SortUNSIGNED
+       case ConvertedTypes.Decimal,

Review comment:
       this doesn't look right for decimal.  I believe it should be signed?  Or 
might vary based on underlying type.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to