This is an automated email from the ASF dual-hosted git repository.
zeroshade pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new b2b2cbe ARROW-14296: [Go] Update generated flatbuf
b2b2cbe is described below
commit b2b2cbeb28a75212f0707f62a5f9a89be8cb180d
Author: Matthew Topol <[email protected]>
AuthorDate: Tue Oct 12 11:52:36 2021 -0400
ARROW-14296: [Go] Update generated flatbuf
Also updates the type ID enums accordingly, adding type ids for as of yet
unsupported types to reduce the size of future changes.
Broken out from #11310 to shrink the size of the change to implement
MonthDayNano intervals in Go
Closes #11389 from zeroshade/arrow-14296-flatbuf
Lead-authored-by: Matthew Topol <[email protected]>
Co-authored-by: Matt Topol <[email protected]>
Signed-off-by: Matthew Topol <[email protected]>
---
go/arrow/array/array.go | 76 ++++++++-------
go/arrow/array/array_test.go | 11 ++-
go/arrow/array/builder.go | 14 ++-
go/arrow/array/interval_test.go | 8 +-
go/arrow/datatype.go | 43 ++++++--
go/arrow/datatype_fixedwidth.go | 8 +-
go/arrow/datatype_fixedwidth_test.go | 6 +-
go/arrow/internal/flatbuf/Date.go | 4 +-
go/arrow/internal/flatbuf/IntervalUnit.go | 15 +--
go/arrow/internal/flatbuf/Time.go | 17 +++-
go/arrow/internal/flatbuf/Timestamp.go | 157 ++++++++++++++++++++++--------
go/arrow/scalar/scalar.go | 42 +++++---
go/arrow/type_string.go | 32 +++---
13 files changed, 299 insertions(+), 134 deletions(-)
diff --git a/go/arrow/array/array.go b/go/arrow/array/array.go
index cc2a1a3..8679773 100644
--- a/go/arrow/array/array.go
+++ b/go/arrow/array/array.go
@@ -139,7 +139,7 @@ func (a *array) Offset() int {
type arrayConstructorFn func(*Data) Interface
var (
- makeArrayFn [32]arrayConstructorFn
+ makeArrayFn [64]arrayConstructorFn
)
func unsupportedArrayType(data *Data) Interface {
@@ -152,7 +152,7 @@ func invalidDataType(data *Data) Interface {
// MakeFromData constructs a strongly-typed array instance from generic Data.
func MakeFromData(data *Data) Interface {
- return makeArrayFn[byte(data.dtype.ID()&0x1f)](data)
+ return makeArrayFn[byte(data.dtype.ID()&0x3f)](data)
}
// NewSlice constructs a zero-copy slice of the array with the indicated
@@ -170,39 +170,47 @@ func NewSlice(arr Interface, i, j int64) Interface {
func init() {
makeArrayFn = [...]arrayConstructorFn{
- arrow.NULL: func(data *Data) Interface { return
NewNullData(data) },
- arrow.BOOL: func(data *Data) Interface { return
NewBooleanData(data) },
- arrow.UINT8: func(data *Data) Interface { return
NewUint8Data(data) },
- arrow.INT8: func(data *Data) Interface { return
NewInt8Data(data) },
- arrow.UINT16: func(data *Data) Interface { return
NewUint16Data(data) },
- arrow.INT16: func(data *Data) Interface { return
NewInt16Data(data) },
- arrow.UINT32: func(data *Data) Interface { return
NewUint32Data(data) },
- arrow.INT32: func(data *Data) Interface { return
NewInt32Data(data) },
- arrow.UINT64: func(data *Data) Interface { return
NewUint64Data(data) },
- arrow.INT64: func(data *Data) Interface { return
NewInt64Data(data) },
- arrow.FLOAT16: func(data *Data) Interface { return
NewFloat16Data(data) },
- arrow.FLOAT32: func(data *Data) Interface { return
NewFloat32Data(data) },
- arrow.FLOAT64: func(data *Data) Interface { return
NewFloat64Data(data) },
- arrow.STRING: func(data *Data) Interface { return
NewStringData(data) },
- arrow.BINARY: func(data *Data) Interface { return
NewBinaryData(data) },
- arrow.FIXED_SIZE_BINARY: func(data *Data) Interface { return
NewFixedSizeBinaryData(data) },
- arrow.DATE32: func(data *Data) Interface { return
NewDate32Data(data) },
- arrow.DATE64: func(data *Data) Interface { return
NewDate64Data(data) },
- arrow.TIMESTAMP: func(data *Data) Interface { return
NewTimestampData(data) },
- arrow.TIME32: func(data *Data) Interface { return
NewTime32Data(data) },
- arrow.TIME64: func(data *Data) Interface { return
NewTime64Data(data) },
- arrow.INTERVAL: func(data *Data) Interface { return
NewIntervalData(data) },
- arrow.DECIMAL: func(data *Data) Interface { return
NewDecimal128Data(data) },
- arrow.LIST: func(data *Data) Interface { return
NewListData(data) },
- arrow.STRUCT: func(data *Data) Interface { return
NewStructData(data) },
- arrow.UNION: unsupportedArrayType,
- arrow.DICTIONARY: unsupportedArrayType,
- arrow.MAP: func(data *Data) Interface { return
NewMapData(data) },
- arrow.EXTENSION: func(data *Data) Interface { return
NewExtensionData(data) },
- arrow.FIXED_SIZE_LIST: func(data *Data) Interface { return
NewFixedSizeListData(data) },
- arrow.DURATION: func(data *Data) Interface { return
NewDurationData(data) },
+ arrow.NULL: func(data *Data) Interface {
return NewNullData(data) },
+ arrow.BOOL: func(data *Data) Interface {
return NewBooleanData(data) },
+ arrow.UINT8: func(data *Data) Interface {
return NewUint8Data(data) },
+ arrow.INT8: func(data *Data) Interface {
return NewInt8Data(data) },
+ arrow.UINT16: func(data *Data) Interface {
return NewUint16Data(data) },
+ arrow.INT16: func(data *Data) Interface {
return NewInt16Data(data) },
+ arrow.UINT32: func(data *Data) Interface {
return NewUint32Data(data) },
+ arrow.INT32: func(data *Data) Interface {
return NewInt32Data(data) },
+ arrow.UINT64: func(data *Data) Interface {
return NewUint64Data(data) },
+ arrow.INT64: func(data *Data) Interface {
return NewInt64Data(data) },
+ arrow.FLOAT16: func(data *Data) Interface {
return NewFloat16Data(data) },
+ arrow.FLOAT32: func(data *Data) Interface {
return NewFloat32Data(data) },
+ arrow.FLOAT64: func(data *Data) Interface {
return NewFloat64Data(data) },
+ arrow.STRING: func(data *Data) Interface {
return NewStringData(data) },
+ arrow.BINARY: func(data *Data) Interface {
return NewBinaryData(data) },
+ arrow.FIXED_SIZE_BINARY: func(data *Data) Interface {
return NewFixedSizeBinaryData(data) },
+ arrow.DATE32: func(data *Data) Interface {
return NewDate32Data(data) },
+ arrow.DATE64: func(data *Data) Interface {
return NewDate64Data(data) },
+ arrow.TIMESTAMP: func(data *Data) Interface {
return NewTimestampData(data) },
+ arrow.TIME32: func(data *Data) Interface {
return NewTime32Data(data) },
+ arrow.TIME64: func(data *Data) Interface {
return NewTime64Data(data) },
+ arrow.INTERVAL_MONTHS: func(data *Data) Interface {
return NewMonthIntervalData(data) },
+ arrow.INTERVAL_DAY_TIME: func(data *Data) Interface {
return NewDayTimeIntervalData(data) },
+ arrow.DECIMAL128: func(data *Data) Interface {
return NewDecimal128Data(data) },
+ arrow.DECIMAL256: unsupportedArrayType,
+ arrow.LIST: func(data *Data) Interface {
return NewListData(data) },
+ arrow.STRUCT: func(data *Data) Interface {
return NewStructData(data) },
+ arrow.SPARSE_UNION: unsupportedArrayType,
+ arrow.DENSE_UNION: unsupportedArrayType,
+ arrow.DICTIONARY: unsupportedArrayType,
+ arrow.MAP: func(data *Data) Interface {
return NewMapData(data) },
+ arrow.EXTENSION: func(data *Data) Interface {
return NewExtensionData(data) },
+ arrow.FIXED_SIZE_LIST: func(data *Data) Interface {
return NewFixedSizeListData(data) },
+ arrow.DURATION: func(data *Data) Interface {
return NewDurationData(data) },
+ arrow.LARGE_STRING: unsupportedArrayType,
+ arrow.LARGE_BINARY: unsupportedArrayType,
+ arrow.LARGE_LIST: unsupportedArrayType,
+ arrow.INTERVAL_MONTH_DAY_NANO: unsupportedArrayType,
+ arrow.INTERVAL: func(data *Data) Interface {
return NewIntervalData(data) },
// invalid data types to fill out array size 2⁵-1
- 31: invalidDataType,
+ 63: invalidDataType,
}
}
diff --git a/go/arrow/array/array_test.go b/go/arrow/array/array_test.go
index da8ecd7..5d93245 100644
--- a/go/arrow/array/array_test.go
+++ b/go/arrow/array/array_test.go
@@ -68,7 +68,7 @@ func TestMakeFromData(t *testing.T) {
{name: "time64", d: &testDataType{arrow.TIME64}},
{name: "month_interval", d:
arrow.FixedWidthTypes.MonthInterval},
{name: "day_time_interval", d:
arrow.FixedWidthTypes.DayTimeInterval},
- {name: "decimal", d: &testDataType{arrow.DECIMAL}},
+ {name: "decimal", d: &testDataType{arrow.DECIMAL128}},
{name: "list", d: &testDataType{arrow.LIST}, child:
[]*array.Data{
array.NewData(&testDataType{arrow.INT64}, 0 /* length
*/, make([]*memory.Buffer, 2 /*null bitmap, values*/), nil /* childData */, 0
/* nulls */, 0 /* offset */),
@@ -98,12 +98,17 @@ func TestMakeFromData(t *testing.T) {
{name: "extension", d: types.NewUUIDType()},
// unsupported types
- {name: "union", d: &testDataType{arrow.UNION}, expPanic: true,
expError: "unsupported data type: UNION"},
+ {name: "sparse union", d: &testDataType{arrow.SPARSE_UNION},
expPanic: true, expError: "unsupported data type: SPARSE_UNION"},
+ {name: "dense union", d: &testDataType{arrow.DENSE_UNION},
expPanic: true, expError: "unsupported data type: DENSE_UNION"},
{name: "dictionary", d: &testDataType{arrow.DICTIONARY},
expPanic: true, expError: "unsupported data type: DICTIONARY"},
+ {name: "large string", d: &testDataType{arrow.LARGE_STRING},
expPanic: true, expError: "unsupported data type: LARGE_STRING"},
+ {name: "large binary", d: &testDataType{arrow.LARGE_BINARY},
expPanic: true, expError: "unsupported data type: LARGE_BINARY"},
+ {name: "large list", d: &testDataType{arrow.LARGE_LIST},
expPanic: true, expError: "unsupported data type: LARGE_LIST"},
+ {name: "decimal256", d: &testDataType{arrow.DECIMAL256},
expPanic: true, expError: "unsupported data type: DECIMAL256"},
// invalid types
{name: "invalid(-1)", d: &testDataType{arrow.Type(-1)},
expPanic: true, expError: "invalid data type: Type(-1)"},
- {name: "invalid(31)", d: &testDataType{arrow.Type(31)},
expPanic: true, expError: "invalid data type: Type(31)"},
+ {name: "invalid(63)", d: &testDataType{arrow.Type(63)},
expPanic: true, expError: "invalid data type: Type(63)"},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
diff --git a/go/arrow/array/builder.go b/go/arrow/array/builder.go
index dbaad11..5342c5f 100644
--- a/go/arrow/array/builder.go
+++ b/go/arrow/array/builder.go
@@ -267,18 +267,28 @@ func NewBuilder(mem memory.Allocator, dtype
arrow.DataType) Builder {
case *arrow.MonthIntervalType:
return NewMonthIntervalBuilder(mem)
}
- case arrow.DECIMAL:
+ case arrow.INTERVAL_MONTHS:
+ return NewMonthIntervalBuilder(mem)
+ case arrow.INTERVAL_DAY_TIME:
+ return NewDayTimeIntervalBuilder(mem)
+ case arrow.INTERVAL_MONTH_DAY_NANO:
+ case arrow.DECIMAL128:
if typ, ok := dtype.(*arrow.Decimal128Type); ok {
return NewDecimal128Builder(mem, typ)
}
+ case arrow.DECIMAL256:
case arrow.LIST:
typ := dtype.(*arrow.ListType)
return NewListBuilder(mem, typ.Elem())
case arrow.STRUCT:
typ := dtype.(*arrow.StructType)
return NewStructBuilder(mem, typ)
- case arrow.UNION:
+ case arrow.SPARSE_UNION:
+ case arrow.DENSE_UNION:
case arrow.DICTIONARY:
+ case arrow.LARGE_STRING:
+ case arrow.LARGE_BINARY:
+ case arrow.LARGE_LIST:
case arrow.MAP:
typ := dtype.(*arrow.MapType)
return NewMapBuilder(mem, typ.KeyType(), typ.ItemType(),
typ.KeysSorted)
diff --git a/go/arrow/array/interval_test.go b/go/arrow/array/interval_test.go
index bac0bb4..3e47881 100644
--- a/go/arrow/array/interval_test.go
+++ b/go/arrow/array/interval_test.go
@@ -83,8 +83,8 @@ func TestMonthIntervalArray(t *testing.T) {
sub := array.MakeFromData(arr.Data())
defer sub.Release()
- if sub.DataType().ID() != arrow.INTERVAL {
- t.Fatalf("invalid type: got=%q, want=interval",
sub.DataType().Name())
+ if sub.DataType().ID() != arrow.INTERVAL_MONTHS {
+ t.Fatalf("invalid type: got=%q, want=interval_months",
sub.DataType().Name())
}
if _, ok := sub.(*array.MonthInterval); !ok {
@@ -208,8 +208,8 @@ func TestDayTimeArray(t *testing.T) {
sub := array.MakeFromData(arr.Data())
defer sub.Release()
- if sub.DataType().ID() != arrow.INTERVAL {
- t.Fatalf("invalid type: got=%q, want=interval",
sub.DataType().Name())
+ if sub.DataType().ID() != arrow.INTERVAL_DAY_TIME {
+ t.Fatalf("invalid type: got=%q, want=interval_day_time",
sub.DataType().Name())
}
if _, ok := sub.(*array.DayTimeInterval); !ok {
diff --git a/go/arrow/datatype.go b/go/arrow/datatype.go
index 9386ebf..57cce34 100644
--- a/go/arrow/datatype.go
+++ b/go/arrow/datatype.go
@@ -95,12 +95,18 @@ const (
// nanoseconds since midnight
TIME64
- // INTERVAL is YEAR_MONTH or DAY_TIME interval in SQL style
- INTERVAL
+ // INTERVAL_MONTHS is YEAR_MONTH interval in SQL style
+ INTERVAL_MONTHS
+
+ // INTERVAL_DAY_TIME is DAY_TIME in SQL Style
+ INTERVAL_DAY_TIME
- // DECIMAL is a precision- and scale-based decimal type. Storage type
depends on the
+ // DECIMAL128 is a precision- and scale-based decimal type. Storage
type depends on the
// parameters.
- DECIMAL
+ DECIMAL128
+
+ // DECIMAL256 is a precision and scale based decimal type, with 256 bit
max. not yet implemented
+ DECIMAL256
// LIST is a list of some logical data type
LIST
@@ -108,8 +114,11 @@ const (
// STRUCT of logical types
STRUCT
- // UNION of logical types
- UNION
+ // SPARSE_UNION of logical types. not yet implemented
+ SPARSE_UNION
+
+ // DENSE_UNION of logical types. not yet implemented
+ DENSE_UNION
// DICTIONARY aka Category type
DICTIONARY
@@ -126,6 +135,28 @@ const (
// Measure of elapsed time in either seconds, milliseconds, microseconds
// or nanoseconds.
DURATION
+
+ // like STRING, but 64-bit offsets. not yet implemented
+ LARGE_STRING
+
+ // like BINARY but with 64-bit offsets, not yet implemented
+ LARGE_BINARY
+
+ // like LIST but with 64-bit offsets. not yet implmented
+ LARGE_LIST
+
+ // calendar interval with three fields
+ INTERVAL_MONTH_DAY_NANO
+
+ // INTERVAL could be any of the interval types, kept to avoid breaking
anyone
+ // after switching to individual type ids for the interval types that
were using
+ // it when calling MakeFromData or NewBuilder
+ //
+ // Deprecated and will be removed in the next major version release
+ INTERVAL
+
+ // Alias to ensure we do not break any consumers
+ DECIMAL = DECIMAL128
)
// DataType is the representation of an Arrow type.
diff --git a/go/arrow/datatype_fixedwidth.go b/go/arrow/datatype_fixedwidth.go
index 6efc81d..b30ee63 100644
--- a/go/arrow/datatype_fixedwidth.go
+++ b/go/arrow/datatype_fixedwidth.go
@@ -167,10 +167,10 @@ type MonthInterval int32
// representing a number of months.
type MonthIntervalType struct{}
-func (*MonthIntervalType) ID() Type { return INTERVAL }
+func (*MonthIntervalType) ID() Type { return INTERVAL_MONTHS }
func (*MonthIntervalType) Name() string { return "month_interval" }
func (*MonthIntervalType) String() string { return "month_interval" }
-func (*MonthIntervalType) Fingerprint() string { return
typeIDFingerprint(INTERVAL) + "M" }
+func (*MonthIntervalType) Fingerprint() string { return
typeIDFingerprint(INTERVAL_MONTHS) + "M" }
// BitWidth returns the number of bits required to store a single element of
this data type in memory.
func (t *MonthIntervalType) BitWidth() int { return 32 }
@@ -185,10 +185,10 @@ type DayTimeInterval struct {
// representing a number of days and milliseconds (fraction of day).
type DayTimeIntervalType struct{}
-func (*DayTimeIntervalType) ID() Type { return INTERVAL }
+func (*DayTimeIntervalType) ID() Type { return INTERVAL_DAY_TIME }
func (*DayTimeIntervalType) Name() string { return "day_time_interval" }
func (*DayTimeIntervalType) String() string { return "day_time_interval" }
-func (*DayTimeIntervalType) Fingerprint() string { return
typeIDFingerprint(INTERVAL) + "d" }
+func (*DayTimeIntervalType) Fingerprint() string { return
typeIDFingerprint(INTERVAL_DAY_TIME) + "d" }
// BitWidth returns the number of bits required to store a single element of
this data type in memory.
func (t *DayTimeIntervalType) BitWidth() int { return 64 }
diff --git a/go/arrow/datatype_fixedwidth_test.go
b/go/arrow/datatype_fixedwidth_test.go
index 3349703..0b3aad9 100644
--- a/go/arrow/datatype_fixedwidth_test.go
+++ b/go/arrow/datatype_fixedwidth_test.go
@@ -57,7 +57,7 @@ func TestDecimal128Type(t *testing.T) {
t.Fatalf("invalid bitwidth: got=%d, want=%d",
got, want)
}
- if got, want := dt.ID(), arrow.DECIMAL; got != want {
+ if got, want := dt.ID(), arrow.DECIMAL128; got != want {
t.Fatalf("invalid type ID: got=%v, want=%v",
got, want)
}
@@ -268,7 +268,7 @@ func TestDayTimeIntervalType(t *testing.T) {
t.Fatalf("invalid type name: got=%q, want=%q", got, want)
}
- if got, want := dt.ID(), arrow.INTERVAL; got != want {
+ if got, want := dt.ID(), arrow.INTERVAL_DAY_TIME; got != want {
t.Fatalf("invalid type ID: got=%v, want=%v", got, want)
}
@@ -287,7 +287,7 @@ func TestMonthIntervalType(t *testing.T) {
t.Fatalf("invalid type name: got=%q, want=%q", got, want)
}
- if got, want := dt.ID(), arrow.INTERVAL; got != want {
+ if got, want := dt.ID(), arrow.INTERVAL_MONTHS; got != want {
t.Fatalf("invalid type ID: got=%v, want=%v", got, want)
}
diff --git a/go/arrow/internal/flatbuf/Date.go
b/go/arrow/internal/flatbuf/Date.go
index fc19132..32983ec 100644
--- a/go/arrow/internal/flatbuf/Date.go
+++ b/go/arrow/internal/flatbuf/Date.go
@@ -22,8 +22,8 @@ import (
flatbuffers "github.com/google/flatbuffers/go"
)
-/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
-/// epoch (1970-01-01), stored in either of two units:
+/// Date is either a 32-bit or 64-bit signed integer type representing an
+/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units:
///
/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no
/// leap seconds), where the values are evenly divisible by 86400000
diff --git a/go/arrow/internal/flatbuf/IntervalUnit.go
b/go/arrow/internal/flatbuf/IntervalUnit.go
index 5c6aeec..f3ed1ae 100644
--- a/go/arrow/internal/flatbuf/IntervalUnit.go
+++ b/go/arrow/internal/flatbuf/IntervalUnit.go
@@ -23,18 +23,21 @@ import "strconv"
type IntervalUnit int16
const (
- IntervalUnitYEAR_MONTH IntervalUnit = 0
- IntervalUnitDAY_TIME IntervalUnit = 1
+ IntervalUnitYEAR_MONTH IntervalUnit = 0
+ IntervalUnitDAY_TIME IntervalUnit = 1
+ IntervalUnitMONTH_DAY_NANO IntervalUnit = 2
)
var EnumNamesIntervalUnit = map[IntervalUnit]string{
- IntervalUnitYEAR_MONTH: "YEAR_MONTH",
- IntervalUnitDAY_TIME: "DAY_TIME",
+ IntervalUnitYEAR_MONTH: "YEAR_MONTH",
+ IntervalUnitDAY_TIME: "DAY_TIME",
+ IntervalUnitMONTH_DAY_NANO: "MONTH_DAY_NANO",
}
var EnumValuesIntervalUnit = map[string]IntervalUnit{
- "YEAR_MONTH": IntervalUnitYEAR_MONTH,
- "DAY_TIME": IntervalUnitDAY_TIME,
+ "YEAR_MONTH": IntervalUnitYEAR_MONTH,
+ "DAY_TIME": IntervalUnitDAY_TIME,
+ "MONTH_DAY_NANO": IntervalUnitMONTH_DAY_NANO,
}
func (v IntervalUnit) String() string {
diff --git a/go/arrow/internal/flatbuf/Time.go
b/go/arrow/internal/flatbuf/Time.go
index 07b80ee..2fb6e4c 100644
--- a/go/arrow/internal/flatbuf/Time.go
+++ b/go/arrow/internal/flatbuf/Time.go
@@ -22,9 +22,20 @@ import (
flatbuffers "github.com/google/flatbuffers/go"
)
-/// Time type. The physical storage type depends on the unit
-/// - SECOND and MILLISECOND: 32 bits
-/// - MICROSECOND and NANOSECOND: 64 bits
+/// Time is either a 32-bit or 64-bit signed integer type representing an
+/// elapsed time since midnight, stored in either of four units: seconds,
+/// milliseconds, microseconds or nanoseconds.
+///
+/// The integer `bitWidth` depends on the `unit` and must be one of the
following:
+/// * SECOND and MILLISECOND: 32 bits
+/// * MICROSECOND and NANOSECOND: 64 bits
+///
+/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds
+/// (exclusive), adjusted for the time unit (for example, up to 86400000
+/// exclusive for the MILLISECOND unit).
+/// This definition doesn't allow for leap seconds. Time values from
+/// measurements with leap seconds will need to be corrected when ingesting
+/// into Arrow (for example by replacing the value 86400 with 86399).
type Time struct {
_tab flatbuffers.Table
}
diff --git a/go/arrow/internal/flatbuf/Timestamp.go
b/go/arrow/internal/flatbuf/Timestamp.go
index 44f9440..f532114 100644
--- a/go/arrow/internal/flatbuf/Timestamp.go
+++ b/go/arrow/internal/flatbuf/Timestamp.go
@@ -22,12 +22,111 @@ import (
flatbuffers "github.com/google/flatbuffers/go"
)
-/// Time elapsed from the Unix epoch, 00:00:00.000 on 1 January 1970, excluding
-/// leap seconds, as a 64-bit integer. Note that UNIX time does not include
-/// leap seconds.
+/// Timestamp is a 64-bit signed integer representing an elapsed time since a
+/// fixed epoch, stored in either of four units: seconds, milliseconds,
+/// microseconds or nanoseconds, and is optionally annotated with a timezone.
///
-/// The Timestamp metadata supports both "time zone naive" and "time zone
-/// aware" timestamps. Read about the timezone attribute for more detail
+/// Timestamp values do not include any leap seconds (in other words, all
+/// days are considered 86400 seconds long).
+///
+/// Timestamps with a non-empty timezone
+/// ------------------------------------
+///
+/// If a Timestamp column has a non-empty timezone value, its epoch is
+/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone
+/// (the Unix epoch), regardless of the Timestamp's own timezone.
+///
+/// Therefore, timestamp values with a non-empty timezone correspond to
+/// physical points in time together with some additional information about
+/// how the data was obtained and/or how to display it (the timezone).
+///
+/// For example, the timestamp value 0 with the timezone string
"Europe/Paris"
+/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the
+/// application may prefer to display it as "January 1st 1970, 01h00" in
+/// the Europe/Paris timezone (which is the same physical point in time).
+///
+/// One consequence is that timestamp values with a non-empty timezone
+/// can be compared and ordered directly, since they all share the same
+/// well-known point of reference (the Unix epoch).
+///
+/// Timestamps with an unset / empty timezone
+/// -----------------------------------------
+///
+/// If a Timestamp column has no timezone value, its epoch is
+/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone.
+///
+/// Therefore, timestamp values without a timezone cannot be meaningfully
+/// interpreted as physical points in time, but only as calendar / clock
+/// indications ("wall clock time") in an unspecified timezone.
+///
+/// For example, the timestamp value 0 with an empty timezone string
+/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there
+/// is not enough information to interpret it as a well-defined physical
+/// point in time.
+///
+/// One consequence is that timestamp values without a timezone cannot
+/// be reliably compared or ordered, since they may have different points of
+/// reference. In particular, it is *not* possible to interpret an unset
+/// or empty timezone as the same as "UTC".
+///
+/// Conversion between timezones
+/// ----------------------------
+///
+/// If a Timestamp column has a non-empty timezone, changing the timezone
+/// to a different non-empty value is a metadata-only operation:
+/// the timestamp values need not change as their point of reference remains
+/// the same (the Unix epoch).
+///
+/// However, if a Timestamp column has no timezone value, changing it to a
+/// non-empty value requires to think about the desired semantics.
+/// One possibility is to assume that the original timestamp values are
+/// relative to the epoch of the timezone being set; timestamp values should
+/// then adjusted to the Unix epoch (for example, changing the timezone from
+/// empty to "Europe/Paris" would require converting the timestamp values
+/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is
+/// nevertheless correct).
+///
+/// Guidelines for encoding data from external libraries
+/// ----------------------------------------------------
+///
+/// Date & time libraries often have multiple different data types for temporal
+/// data. In order to ease interoperability between different implementations
the
+/// Arrow project has some recommendations for encoding these types into a
Timestamp
+/// column.
+///
+/// An "instant" represents a physical point in time that has no relevant
timezone
+/// (for example, astronomical data). To encode an instant, use a Timestamp
with
+/// the timezone string set to "UTC", and make sure the Timestamp values
+/// are relative to the UTC epoch (January 1st 1970, midnight).
+///
+/// A "zoned date-time" represents a physical point in time annotated with an
+/// informative timezone (for example, the timezone in which the data was
+/// recorded). To encode a zoned date-time, use a Timestamp with the timezone
+/// string set to the name of the timezone, and make sure the Timestamp values
+/// are relative to the UTC epoch (January 1st 1970, midnight).
+///
+/// (There is some ambiguity between an instant and a zoned date-time with the
+/// UTC timezone. Both of these are stored the same in Arrow. Typically,
+/// this distinction does not matter. If it does, then an application should
+/// use custom metadata or an extension type to distinguish between the two
cases.)
+///
+/// An "offset date-time" represents a physical point in time combined with an
+/// explicit offset from UTC. To encode an offset date-time, use a Timestamp
+/// with the timezone string set to the numeric timezone offset string
+/// (e.g. "+03:00"), and make sure the Timestamp values are relative to
+/// the UTC epoch (January 1st 1970, midnight).
+///
+/// A "naive date-time" (also called "local date-time" in some libraries)
+/// represents a wall clock time combined with a calendar date, but with
+/// no indication of how to map this information to a physical point in time.
+/// Naive date-times must be handled with care because of this missing
+/// information, and also because daylight saving time (DST) may make
+/// some values ambiguous or non-existent. A naive date-time may be
+/// stored as a struct with Date and Time fields. However, it may also be
+/// encoded into a Timestamp column with an empty timezone. The timestamp
+/// values should be computed "as if" the timezone of the date-time values
+/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would
+/// be encoded as timestamp value 0.
type Timestamp struct {
_tab flatbuffers.Table
}
@@ -60,26 +159,16 @@ func (rcv *Timestamp) MutateUnit(n TimeUnit) bool {
return rcv._tab.MutateInt16Slot(4, int16(n))
}
-/// The time zone is a string indicating the name of a time zone, one of:
+/// The timezone is an optional string indicating the name of a timezone,
+/// one of:
///
-/// * As used in the Olson time zone database (the "tz database" or
-/// "tzdata"), such as "America/New_York"
-/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+/// * As used in the Olson timezone database (the "tz database" or
+/// "tzdata"), such as "America/New_York".
+/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
+/// such as "+07:30".
///
/// Whether a timezone string is present indicates different semantics about
-/// the data:
-///
-/// * If the time zone is null or equal to an empty string, the data is "time
-/// zone naive" and shall be displayed *as is* to the user, not localized
-/// to the locale of the user. This data can be though of as UTC but
-/// without having "UTC" as the time zone, it is not considered to be
-/// localized to any time zone
-///
-/// * If the time zone is set to a valid value, values can be displayed as
-/// "localized" to that time zone, even though the underlying 64-bit
-/// integers are identical to the same data stored in UTC. Converting
-/// between time zones is a metadata-only operation and does not change the
-/// underlying values
+/// the data (see above).
func (rcv *Timestamp) Timezone() []byte {
o := flatbuffers.UOffsetT(rcv._tab.Offset(6))
if o != 0 {
@@ -88,26 +177,16 @@ func (rcv *Timestamp) Timezone() []byte {
return nil
}
-/// The time zone is a string indicating the name of a time zone, one of:
+/// The timezone is an optional string indicating the name of a timezone,
+/// one of:
///
-/// * As used in the Olson time zone database (the "tz database" or
-/// "tzdata"), such as "America/New_York"
-/// * An absolute time zone offset of the form +XX:XX or -XX:XX, such as +07:30
+/// * As used in the Olson timezone database (the "tz database" or
+/// "tzdata"), such as "America/New_York".
+/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX",
+/// such as "+07:30".
///
/// Whether a timezone string is present indicates different semantics about
-/// the data:
-///
-/// * If the time zone is null or equal to an empty string, the data is "time
-/// zone naive" and shall be displayed *as is* to the user, not localized
-/// to the locale of the user. This data can be though of as UTC but
-/// without having "UTC" as the time zone, it is not considered to be
-/// localized to any time zone
-///
-/// * If the time zone is set to a valid value, values can be displayed as
-/// "localized" to that time zone, even though the underlying 64-bit
-/// integers are identical to the same data stored in UTC. Converting
-/// between time zones is a metadata-only operation and does not change the
-/// underlying values
+/// the data (see above).
func TimestampStart(builder *flatbuffers.Builder) {
builder.StartObject(2)
}
diff --git a/go/arrow/scalar/scalar.go b/go/arrow/scalar/scalar.go
index 6d6ab95..1662081 100644
--- a/go/arrow/scalar/scalar.go
+++ b/go/arrow/scalar/scalar.go
@@ -389,7 +389,7 @@ func convertToNumeric(v reflect.Value, to reflect.Type, fn
reflect.Value) Scalar
// MakeNullScalar creates a scalar value of the desired type representing a
null value
func MakeNullScalar(dt arrow.DataType) Scalar {
- return makeNullFn[byte(dt.ID()&0x1f)](dt)
+ return makeNullFn[byte(dt.ID()&0x3f)](dt)
}
func unsupportedScalarType(dt arrow.DataType) Scalar {
@@ -402,7 +402,7 @@ func invalidScalarType(dt arrow.DataType) Scalar {
type scalarMakeNullFn func(arrow.DataType) Scalar
-var makeNullFn [32]scalarMakeNullFn
+var makeNullFn [64]scalarMakeNullFn
func init() {
makeNullFn = [...]scalarMakeNullFn{
@@ -433,18 +433,26 @@ func init() {
}
return &DayTimeInterval{scalar: scalar{dt, false}}
},
- arrow.DECIMAL: func(dt arrow.DataType) Scalar { return
&Decimal128{scalar: scalar{dt, false}} },
- arrow.LIST: func(dt arrow.DataType) Scalar { return
&List{scalar: scalar{dt, false}} },
- arrow.STRUCT: func(dt arrow.DataType) Scalar { return
&Struct{scalar: scalar{dt, false}} },
- arrow.UNION: unsupportedScalarType,
- arrow.DICTIONARY: unsupportedScalarType,
- arrow.MAP: func(dt arrow.DataType) Scalar { return
&Map{&List{scalar: scalar{dt, false}}} },
- arrow.EXTENSION: func(dt arrow.DataType) Scalar { return
&Extension{scalar: scalar{dt, false}} },
- arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar { return
&FixedSizeList{&List{scalar: scalar{dt, false}}} },
- arrow.DURATION: func(dt arrow.DataType) Scalar { return
&Duration{scalar: scalar{dt, false}} },
+ arrow.INTERVAL_MONTHS: func(dt arrow.DataType) Scalar {
return &MonthInterval{scalar: scalar{dt, false}} },
+ arrow.INTERVAL_DAY_TIME: func(dt arrow.DataType) Scalar {
return &DayTimeInterval{scalar: scalar{dt, false}} },
+ arrow.DECIMAL128: func(dt arrow.DataType) Scalar {
return &Decimal128{scalar: scalar{dt, false}} },
+ arrow.LIST: func(dt arrow.DataType) Scalar {
return &List{scalar: scalar{dt, false}} },
+ arrow.STRUCT: func(dt arrow.DataType) Scalar {
return &Struct{scalar: scalar{dt, false}} },
+ arrow.SPARSE_UNION: unsupportedScalarType,
+ arrow.DENSE_UNION: unsupportedScalarType,
+ arrow.DICTIONARY: unsupportedScalarType,
+ arrow.LARGE_STRING: unsupportedScalarType,
+ arrow.LARGE_BINARY: unsupportedScalarType,
+ arrow.LARGE_LIST: unsupportedScalarType,
+ arrow.DECIMAL256: unsupportedScalarType,
+ arrow.MAP: func(dt arrow.DataType) Scalar {
return &Map{&List{scalar: scalar{dt, false}}} },
+ arrow.EXTENSION: func(dt arrow.DataType) Scalar {
return &Extension{scalar: scalar{dt, false}} },
+ arrow.FIXED_SIZE_LIST: func(dt arrow.DataType) Scalar {
return &FixedSizeList{&List{scalar: scalar{dt, false}}} },
+ arrow.DURATION: func(dt arrow.DataType) Scalar {
return &Duration{scalar: scalar{dt, false}} },
+ arrow.INTERVAL_MONTH_DAY_NANO: unsupportedScalarType,
// invalid data types to fill out array size 2⁵-1
- 31: invalidScalarType,
+ 63: invalidScalarType,
}
f := numericMap[arrow.FLOAT16]
@@ -727,10 +735,12 @@ func Hash(seed maphash.Seed, s Scalar) uint64 {
h.Reset()
}
- valueHash := func(v interface{}) {
+ valueHash := func(v interface{}) uint64 {
switch v := v.(type) {
case int32:
h.Write((*[4]byte)(unsafe.Pointer(&v))[:])
+ case int64:
+ h.Write((*[8]byte)(unsafe.Pointer(&v))[:])
case arrow.Date32:
binary.Write(&h, endian.Native, uint32(v))
case arrow.Time32:
@@ -753,6 +763,7 @@ func Hash(seed maphash.Seed, s Scalar) uint64 {
binary.Write(&h, endian.Native, uint64(v.HighBits()))
}
hash()
+ return out
}
h.Reset()
@@ -761,13 +772,12 @@ func Hash(seed maphash.Seed, s Scalar) uint64 {
case *Extension:
out ^= Hash(seed, s.Value)
case *DayTimeInterval:
- valueHash(s.Value.Days)
- valueHash(s.Value.Milliseconds)
+ return valueHash(s.Value.Days) & valueHash(s.Value.Milliseconds)
case PrimitiveScalar:
h.Write(s.Data())
hash()
case TemporalScalar:
- valueHash(s.value())
+ return valueHash(s.value())
case ListScalar:
array.Hash(&h, s.GetList().Data())
hash()
diff --git a/go/arrow/type_string.go b/go/arrow/type_string.go
index d81cb7b..3ba93f3 100644
--- a/go/arrow/type_string.go
+++ b/go/arrow/type_string.go
@@ -29,21 +29,29 @@ func _() {
_ = x[TIMESTAMP-18]
_ = x[TIME32-19]
_ = x[TIME64-20]
- _ = x[INTERVAL-21]
- _ = x[DECIMAL-22]
- _ = x[LIST-23]
- _ = x[STRUCT-24]
- _ = x[UNION-25]
- _ = x[DICTIONARY-26]
- _ = x[MAP-27]
- _ = x[EXTENSION-28]
- _ = x[FIXED_SIZE_LIST-29]
- _ = x[DURATION-30]
+ _ = x[INTERVAL_MONTHS-21]
+ _ = x[INTERVAL_DAY_TIME-22]
+ _ = x[DECIMAL128-23]
+ _ = x[DECIMAL256-24]
+ _ = x[LIST-25]
+ _ = x[STRUCT-26]
+ _ = x[SPARSE_UNION-27]
+ _ = x[DENSE_UNION-28]
+ _ = x[DICTIONARY-29]
+ _ = x[MAP-30]
+ _ = x[EXTENSION-31]
+ _ = x[FIXED_SIZE_LIST-32]
+ _ = x[DURATION-33]
+ _ = x[LARGE_STRING-34]
+ _ = x[LARGE_BINARY-35]
+ _ = x[LARGE_LIST-36]
+ _ = x[INTERVAL_MONTH_DAY_NANO-37]
+ _ = x[INTERVAL-38]
}
-const _Type_name =
"NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVALDECIMALLISTSTRUCTUNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATION"
+const _Type_name =
"NULLBOOLUINT8INT8UINT16INT16UINT32INT32UINT64INT64FLOAT16FLOAT32FLOAT64STRINGBINARYFIXED_SIZE_BINARYDATE32DATE64TIMESTAMPTIME32TIME64INTERVAL_MONTHSINTERVAL_DAY_TIMEDECIMAL128DECIMAL256LISTSTRUCTSPARSE_UNIONDENSE_UNIONDICTIONARYMAPEXTENSIONFIXED_SIZE_LISTDURATIONLARGE_STRINGLARGE_BINARYLARGE_LISTINTERVAL_MONTH_DAY_NANOINTERVAL"
-var _Type_index = [...]uint8{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64,
71, 77, 83, 100, 106, 112, 121, 127, 133, 141, 148, 152, 158, 163, 173, 176,
185, 200, 208}
+var _Type_index = [...]uint16{0, 4, 8, 13, 17, 23, 28, 34, 39, 45, 50, 57, 64,
71, 77, 83, 100, 106, 112, 121, 127, 133, 148, 165, 175, 185, 189, 195, 207,
218, 228, 231, 240, 255, 263, 275, 287, 297, 320, 328}
func (i Type) String() string {
if i < 0 || i >= Type(len(_Type_index)-1) {