Repository: arrow Updated Branches: refs/heads/master 5d6c6ad6a -> 360942e61
ARROW-672: [Format] Add MetadataVersion::V3 for Arrow 0.3 As a matter of diligence, we increment the metadata version for Arrow 0.3 since we've changed the metadata format is various ways. Author: Wes McKinney <wes.mckin...@twosigma.com> Closes #488 from wesm/ARROW-672 and squashes the following commits: f39733e [Wes McKinney] Add C++ unit test for read/write MetadataVersion. Change MetadataVersion to C++11 enum class bb09ba2 [Wes McKinney] Add MetadataVersion::V3 for Arrow 0.3 Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/360942e6 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/360942e6 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/360942e6 Branch: refs/heads/master Commit: 360942e6171b301d5efb1686794239e3527828f3 Parents: 5d6c6ad Author: Wes McKinney <wes.mckin...@twosigma.com> Authored: Tue Apr 4 16:20:56 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Tue Apr 4 16:20:56 2017 -0400 ---------------------------------------------------------------------- c_glib/arrow-glib/ipc-metadata-version.cpp | 22 +++++++++++-------- c_glib/arrow-glib/ipc-metadata-version.h | 4 +++- c_glib/arrow-glib/ipc-metadata-version.hpp | 4 ++-- cpp/src/arrow/ipc/ipc-read-write-test.cc | 20 +++++++++++++++++ cpp/src/arrow/ipc/metadata.cc | 23 +++++++++++++++++++- cpp/src/arrow/ipc/metadata.h | 6 ++--- cpp/src/arrow/ipc/reader.cc | 11 +++++++--- cpp/src/arrow/ipc/reader.h | 2 +- format/Schema.fbs | 3 ++- .../arrow/vector/stream/MessageSerializer.java | 2 +- 10 files changed, 75 insertions(+), 22 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/c_glib/arrow-glib/ipc-metadata-version.cpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/ipc-metadata-version.cpp b/c_glib/arrow-glib/ipc-metadata-version.cpp index c5cc8d3..f591f29 100644 --- a/c_glib/arrow-glib/ipc-metadata-version.cpp +++ b/c_glib/arrow-glib/ipc-metadata-version.cpp @@ -29,31 +29,35 @@ * @short_description: Metadata version mapgging between Arrow and arrow-glib * * #GArrowIPCMetadataVersion provides metadata versions corresponding - * to `arrow::ipc::MetadataVersion::type` values. + * to `arrow::ipc::MetadataVersion` values. */ GArrowIPCMetadataVersion -garrow_ipc_metadata_version_from_raw(arrow::ipc::MetadataVersion::type version) +garrow_ipc_metadata_version_from_raw(arrow::ipc::MetadataVersion version) { switch (version) { - case arrow::ipc::MetadataVersion::type::V1: + case arrow::ipc::MetadataVersion::V1: return GARROW_IPC_METADATA_VERSION_V1; - case arrow::ipc::MetadataVersion::type::V2: + case arrow::ipc::MetadataVersion::V2: return GARROW_IPC_METADATA_VERSION_V2; + case arrow::ipc::MetadataVersion::V3: + return GARROW_IPC_METADATA_VERSION_V3; default: - return GARROW_IPC_METADATA_VERSION_V2; + return GARROW_IPC_METADATA_VERSION_V3; } } -arrow::ipc::MetadataVersion::type +arrow::ipc::MetadataVersion garrow_ipc_metadata_version_to_raw(GArrowIPCMetadataVersion version) { switch (version) { case GARROW_IPC_METADATA_VERSION_V1: - return arrow::ipc::MetadataVersion::type::V1; + return arrow::ipc::MetadataVersion::V1; case GARROW_IPC_METADATA_VERSION_V2: - return arrow::ipc::MetadataVersion::type::V2; + return arrow::ipc::MetadataVersion::V2; + case GARROW_IPC_METADATA_VERSION_V3: + return arrow::ipc::MetadataVersion::V3; default: - return arrow::ipc::MetadataVersion::type::V2; + return arrow::ipc::MetadataVersion::V3; } } http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/c_glib/arrow-glib/ipc-metadata-version.h ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/ipc-metadata-version.h b/c_glib/arrow-glib/ipc-metadata-version.h index ccfd52a..20defdb 100644 --- a/c_glib/arrow-glib/ipc-metadata-version.h +++ b/c_glib/arrow-glib/ipc-metadata-version.h @@ -27,13 +27,15 @@ G_BEGIN_DECLS * GArrowIPCMetadataVersion: * @GARROW_IPC_METADATA_VERSION_V1: Version 1. * @GARROW_IPC_METADATA_VERSION_V2: Version 2. + * @GARROW_IPC_METADATA_VERSION_V3: Version 3. * * They are corresponding to `arrow::ipc::MetadataVersion::type` * values. */ typedef enum { GARROW_IPC_METADATA_VERSION_V1, - GARROW_IPC_METADATA_VERSION_V2 + GARROW_IPC_METADATA_VERSION_V2, + GARROW_IPC_METADATA_VERSION_V3 } GArrowIPCMetadataVersion; G_END_DECLS http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/c_glib/arrow-glib/ipc-metadata-version.hpp ---------------------------------------------------------------------- diff --git a/c_glib/arrow-glib/ipc-metadata-version.hpp b/c_glib/arrow-glib/ipc-metadata-version.hpp index 2a7e8cf..229565f 100644 --- a/c_glib/arrow-glib/ipc-metadata-version.hpp +++ b/c_glib/arrow-glib/ipc-metadata-version.hpp @@ -23,5 +23,5 @@ #include <arrow-glib/ipc-metadata-version.h> -GArrowIPCMetadataVersion garrow_ipc_metadata_version_from_raw(arrow::ipc::MetadataVersion::type version); -arrow::ipc::MetadataVersion::type garrow_ipc_metadata_version_to_raw(GArrowIPCMetadataVersion version); +GArrowIPCMetadataVersion garrow_ipc_metadata_version_from_raw(arrow::ipc::MetadataVersion version); +arrow::ipc::MetadataVersion garrow_ipc_metadata_version_to_raw(GArrowIPCMetadataVersion version); http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/cpp/src/arrow/ipc/ipc-read-write-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-read-write-test.cc b/cpp/src/arrow/ipc/ipc-read-write-test.cc index 86ec770..6807296 100644 --- a/cpp/src/arrow/ipc/ipc-read-write-test.cc +++ b/cpp/src/arrow/ipc/ipc-read-write-test.cc @@ -211,6 +211,26 @@ TEST_P(TestIpcRoundTrip, RoundTrip) { CheckRoundtrip(*batch, 1 << 20); } +TEST_F(TestIpcRoundTrip, MetadataVersion) { + std::shared_ptr<RecordBatch> batch; + ASSERT_OK(MakeIntRecordBatch(&batch)); + + ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(1 << 16, "test-metadata", &mmap_)); + + int32_t metadata_length; + int64_t body_length; + + const int64_t buffer_offset = 0; + + ASSERT_OK(WriteRecordBatch( + *batch, buffer_offset, mmap_.get(), &metadata_length, &body_length, pool_)); + + std::shared_ptr<Message> message; + ASSERT_OK(ReadMessage(0, metadata_length, mmap_.get(), &message)); + + ASSERT_EQ(MetadataVersion::V3, message->metadata_version()); +} + TEST_P(TestIpcRoundTrip, SliceRoundTrip) { std::shared_ptr<RecordBatch> batch; ASSERT_OK((*GetParam())(&batch)); // NOLINT clang-tidy gtest issue http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/cpp/src/arrow/ipc/metadata.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata.cc b/cpp/src/arrow/ipc/metadata.cc index 5007f13..2ff25ee 100644 --- a/cpp/src/arrow/ipc/metadata.cc +++ b/cpp/src/arrow/ipc/metadata.cc @@ -50,7 +50,7 @@ using VectorLayoutOffset = flatbuffers::Offset<arrow::flatbuf::VectorLayout>; using Offset = flatbuffers::Offset<void>; using FBString = flatbuffers::Offset<flatbuffers::String>; -static constexpr flatbuf::MetadataVersion kMetadataVersion = flatbuf::MetadataVersion_V2; +static constexpr flatbuf::MetadataVersion kMetadataVersion = flatbuf::MetadataVersion_V3; static Status IntFromFlatbuffer( const flatbuf::Int* int_data, std::shared_ptr<DataType>* out) { @@ -826,6 +826,23 @@ class Message::MessageImpl { } } + MetadataVersion version() const { + switch (message_->version()) { + case flatbuf::MetadataVersion_V1: + // Arrow 0.1 + return MetadataVersion::V1; + case flatbuf::MetadataVersion_V2: + // Arrow 0.2 + return MetadataVersion::V2; + case flatbuf::MetadataVersion_V3: + // Arrow 0.3 + return MetadataVersion::V3; + // Add cases as other versions become available + default: + return MetadataVersion::V3; + } + } + const void* header() const { return message_->header(); } int64_t body_length() const { return message_->bodyLength(); } @@ -856,6 +873,10 @@ Message::Type Message::type() const { return impl_->type(); } +MetadataVersion Message::metadata_version() const { + return impl_->version(); +} + int64_t Message::body_length() const { return impl_->body_length(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/cpp/src/arrow/ipc/metadata.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata.h b/cpp/src/arrow/ipc/metadata.h index 451a76d..b042882 100644 --- a/cpp/src/arrow/ipc/metadata.h +++ b/cpp/src/arrow/ipc/metadata.h @@ -50,9 +50,7 @@ class RandomAccessFile; namespace ipc { -struct MetadataVersion { - enum type { V1, V2 }; -}; +enum class MetadataVersion : char { V1, V2, V3 }; static constexpr const char* kArrowMagicBytes = "ARROW1"; @@ -134,6 +132,8 @@ class ARROW_EXPORT Message { Type type() const; + MetadataVersion metadata_version() const; + const void* header() const; private: http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/cpp/src/arrow/ipc/reader.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 00ea20c..55f632f 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -332,15 +332,20 @@ class FileReader::FileReaderImpl { int num_record_batches() const { return footer_->recordBatches()->size(); } - MetadataVersion::type version() const { + MetadataVersion version() const { switch (footer_->version()) { case flatbuf::MetadataVersion_V1: + // Arrow 0.1 return MetadataVersion::V1; case flatbuf::MetadataVersion_V2: + // Arrow 0.2 return MetadataVersion::V2; + case flatbuf::MetadataVersion_V3: + // Arrow 0.3 + return MetadataVersion::V3; // Add cases as other versions become available default: - return MetadataVersion::V2; + return MetadataVersion::V3; } } @@ -454,7 +459,7 @@ int FileReader::num_record_batches() const { return impl_->num_record_batches(); } -MetadataVersion::type FileReader::version() const { +MetadataVersion FileReader::version() const { return impl_->version(); } http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/cpp/src/arrow/ipc/reader.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h index b62f052..1972446 100644 --- a/cpp/src/arrow/ipc/reader.h +++ b/cpp/src/arrow/ipc/reader.h @@ -91,7 +91,7 @@ class ARROW_EXPORT FileReader { int num_record_batches() const; - MetadataVersion::type version() const; + MetadataVersion version() const; // Read a record batch from the file. Does not copy memory if the input // source supports zero-copy. http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/format/Schema.fbs ---------------------------------------------------------------------- diff --git a/format/Schema.fbs b/format/Schema.fbs index 958f091..ca9c8e6 100644 --- a/format/Schema.fbs +++ b/format/Schema.fbs @@ -21,7 +21,8 @@ namespace org.apache.arrow.flatbuf; enum MetadataVersion:short { V1, - V2 + V2, + V3 } /// These are stored in the flatbuffer in the Type union below http://git-wip-us.apache.org/repos/asf/arrow/blob/360942e6/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java index f85fb51..ec7e0f2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/stream/MessageSerializer.java @@ -329,7 +329,7 @@ public class MessageSerializer { Message.startMessage(builder); Message.addHeaderType(builder, headerType); Message.addHeader(builder, headerOffset); - Message.addVersion(builder, MetadataVersion.V2); + Message.addVersion(builder, MetadataVersion.V3); Message.addBodyLength(builder, bodyLength); builder.finish(Message.endMessage(builder)); return builder.dataBuffer();