Repository: arrow Updated Branches: refs/heads/master d25286718 -> 96734efb7
ARROW-654: [C++] Serialize timezone in IPC metadata Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #416 from wesm/ARROW-654 and squashes the following commits: 001708e [Wes McKinney] Fix API change in Python bindings 3729cf9 [Wes McKinney] Serialize timezone in IPC metadata Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/96734efb Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/96734efb Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/96734efb Branch: refs/heads/master Commit: 96734efb73852f2d8372f72d7c56e8fb3ab4e516 Parents: d252867 Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Wed Mar 22 09:26:09 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Wed Mar 22 09:26:09 2017 -0400 ---------------------------------------------------------------------- cpp/src/arrow/ipc/feather-test.cc | 2 +- cpp/src/arrow/ipc/feather.cc | 2 +- cpp/src/arrow/ipc/metadata.cc | 16 ++++++++++++++-- cpp/src/arrow/ipc/test-common.h | 2 +- cpp/src/arrow/memory_pool.cc | 2 +- cpp/src/arrow/type-test.cc | 2 +- cpp/src/arrow/type.cc | 4 ++-- cpp/src/arrow/type.h | 4 ++-- python/pyarrow/includes/libarrow.pxd | 4 ++-- python/pyarrow/schema.pyx | 2 +- 10 files changed, 26 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather-test.cc b/cpp/src/arrow/ipc/feather-test.cc index 078c3e1..2513887 100644 --- a/cpp/src/arrow/ipc/feather-test.cc +++ b/cpp/src/arrow/ipc/feather-test.cc @@ -355,7 +355,7 @@ TEST_F(TestTableWriter, TimeTypes) { auto f0 = field("f0", date32()); auto f1 = field("f1", time(TimeUnit::MILLI)); auto f2 = field("f2", timestamp(TimeUnit::NANO)); - auto f3 = field("f3", timestamp("US/Los_Angeles", TimeUnit::SECOND)); + auto f3 = field("f3", timestamp(TimeUnit::SECOND, "US/Los_Angeles")); std::shared_ptr<Schema> schema(new Schema({f0, f1, f2, f3})); std::vector<int64_t> values_vec = {0, 1, 2, 3, 4, 5, 6}; http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/feather.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/feather.cc b/cpp/src/arrow/ipc/feather.cc index 72bbaa4..0dd9a81 100644 --- a/cpp/src/arrow/ipc/feather.cc +++ b/cpp/src/arrow/ipc/feather.cc @@ -287,7 +287,7 @@ class TableReader::TableReaderImpl { } else { tz = ""; } - *out = std::make_shared<TimestampType>(tz, unit); + *out = timestamp(unit, tz); } break; case fbs::TypeMetadata_DateMetadata: *out = date32(); http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/metadata.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc index a418d48..4dfda54 100644 --- a/cpp/src/arrow/ipc/metadata.cc +++ b/cpp/src/arrow/ipc/metadata.cc @@ -46,6 +46,7 @@ using LargeRecordBatchOffset = flatbuffers::Offset<flatbuf::LargeRecordBatch>; using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>; using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>; using Offset = flatbuffers::Offset<void>; +using FBString = flatbuffers::Offset<flatbuffers::String>; static constexpr flatbuf::MetadataVersion kMetadataVersion = flatbuf::MetadataVersion_V2; @@ -250,7 +251,12 @@ static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, } case flatbuf::Type_Timestamp: { auto ts_type = static_cast<const flatbuf::Timestamp*>(type_data); - *out = timestamp(FromFlatbufferUnit(ts_type->unit())); + TimeUnit unit = FromFlatbufferUnit(ts_type->unit()); + if (ts_type->timezone() != 0 && ts_type->timezone()->Length() > 0) { + *out = timestamp(unit, ts_type->timezone()->str()); + } else { + *out = timestamp(unit); + } return Status::OK(); } case flatbuf::Type_Interval: @@ -364,7 +370,13 @@ static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type, case Type::TIMESTAMP: { const auto& ts_type = static_cast<const TimestampType&>(*type); *out_type = flatbuf::Type_Timestamp; - *offset = flatbuf::CreateTimestamp(fbb, ToFlatbufferUnit(ts_type.unit)).Union(); + + flatbuf::TimeUnit fb_unit = ToFlatbufferUnit(ts_type.unit); + FBString fb_timezone = 0; + if (ts_type.timezone.size() > 0) { + fb_timezone = fbb.CreateString(ts_type.timezone); + } + *offset = flatbuf::CreateTimestamp(fbb, fb_unit, fb_timezone).Union(); } break; case Type::LIST: *out_type = flatbuf::Type_List; http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/ipc/test-common.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index ba203b0..330af0c 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -497,7 +497,7 @@ Status MakeDate32(std::shared_ptr<RecordBatch>* out) { Status MakeTimestamps(std::shared_ptr<RecordBatch>* out) { std::vector<bool> is_valid = {true, true, true, false, true, true, true}; auto f0 = field("f0", timestamp(TimeUnit::MILLI)); - auto f1 = field("f1", timestamp(TimeUnit::NANO)); + auto f1 = field("f1", timestamp(TimeUnit::NANO, "America/New_York")); auto f2 = field("f2", timestamp(TimeUnit::SECOND)); std::shared_ptr<Schema> schema(new Schema({f0, f1, f2})); http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/memory_pool.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index cf01a02..7992f22 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -19,10 +19,10 @@ #include <algorithm> #include <cstdlib> +#include <iostream> #include <mutex> #include <sstream> #include <stdlib.h> -#include <iostream> #include "arrow/status.h" #include "arrow/util/logging.h" http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type-test.cc b/cpp/src/arrow/type-test.cc index ddfff87..22aa7eb 100644 --- a/cpp/src/arrow/type-test.cc +++ b/cpp/src/arrow/type-test.cc @@ -209,7 +209,7 @@ TEST(TestTimestampType, Equals) { TEST(TestTimestampType, ToString) { auto t1 = timestamp(TimeUnit::MILLI); - auto t2 = timestamp("US/Eastern", TimeUnit::NANO); + auto t2 = timestamp(TimeUnit::NANO, "US/Eastern"); auto t3 = timestamp(TimeUnit::SECOND); auto t4 = timestamp(TimeUnit::MICRO); http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index ee0a89a..64070cb 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -244,8 +244,8 @@ std::shared_ptr<DataType> timestamp(TimeUnit unit) { return std::make_shared<TimestampType>(unit); } -std::shared_ptr<DataType> timestamp(const std::string& timezone, TimeUnit unit) { - return std::make_shared<TimestampType>(timezone, unit); +std::shared_ptr<DataType> timestamp(TimeUnit unit, const std::string& timezone) { + return std::make_shared<TimestampType>(unit, timezone); } std::shared_ptr<DataType> time(TimeUnit unit) { http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/cpp/src/arrow/type.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index adc3161..27b28d2 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -532,7 +532,7 @@ struct ARROW_EXPORT TimestampType : public FixedWidthType { explicit TimestampType(TimeUnit unit = TimeUnit::MILLI) : FixedWidthType(Type::TIMESTAMP), unit(unit) {} - explicit TimestampType(const std::string& timezone, TimeUnit unit = TimeUnit::MILLI) + explicit TimestampType(TimeUnit unit, const std::string& timezone) : FixedWidthType(Type::TIMESTAMP), unit(unit), timezone(timezone) {} TimestampType(const TimestampType& other) : TimestampType(other.unit) {} @@ -603,7 +603,7 @@ std::shared_ptr<DataType> ARROW_EXPORT list(const std::shared_ptr<DataType>& val std::shared_ptr<DataType> ARROW_EXPORT timestamp(TimeUnit unit); std::shared_ptr<DataType> ARROW_EXPORT timestamp( - const std::string& timezone, TimeUnit unit); + TimeUnit unit, const std::string& timezone); std::shared_ptr<DataType> ARROW_EXPORT time(TimeUnit unit); std::shared_ptr<DataType> ARROW_EXPORT struct_( http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/includes/libarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 705fe6b..2d698d3 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -99,14 +99,14 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: shared_ptr[CArray] dictionary() shared_ptr[CDataType] timestamp(TimeUnit unit) - shared_ptr[CDataType] timestamp(const c_string& timezone, TimeUnit unit) + shared_ptr[CDataType] timestamp(TimeUnit unit, const c_string& timezone) cdef cppclass CMemoryPool" arrow::MemoryPool": int64_t bytes_allocated() cdef cppclass CLoggingMemoryPool" arrow::LoggingMemoryPool"(CMemoryPool): CLoggingMemoryPool(CMemoryPool*) - + cdef cppclass CBuffer" arrow::Buffer": uint8_t* data() int64_t size() http://git-wip-us.apache.org/repos/asf/arrow/blob/96734efb/python/pyarrow/schema.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/schema.pyx b/python/pyarrow/schema.pyx index 4bc938d..ee38144 100644 --- a/python/pyarrow/schema.pyx +++ b/python/pyarrow/schema.pyx @@ -314,7 +314,7 @@ def timestamp(unit_str, tz=None): tz = tz.zone c_timezone = tobytes(tz) - out.init(la.timestamp(c_timezone, unit)) + out.init(la.timestamp(unit, c_timezone)) return out