Repository: arrow Updated Branches: refs/heads/master 2c3bd9311 -> 0637e05d5
ARROW-576: [C++] Complete file/stream implementation for union types Author: Wes McKinney <[email protected]> Closes #356 from wesm/ARROW-576 and squashes the following commits: e239ba1 [Wes McKinney] Fix miniconda links 12fde46 [Wes McKinney] Complete metadata roundtrip for unions Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/0637e05d Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/0637e05d Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/0637e05d Branch: refs/heads/master Commit: 0637e05d59f20363a9103ffad5712f981314c4df Parents: 2c3bd93 Author: Wes McKinney <[email protected]> Authored: Thu Mar 2 14:41:29 2017 -0500 Committer: Wes McKinney <[email protected]> Committed: Thu Mar 2 14:41:29 2017 -0500 ---------------------------------------------------------------------- ci/travis_install_conda.sh | 4 +- cpp/src/arrow/ipc/ipc-file-test.cc | 2 +- cpp/src/arrow/ipc/metadata-internal.cc | 101 ++++++++++++++++++---------- 3 files changed, 67 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/ci/travis_install_conda.sh ---------------------------------------------------------------------- diff --git a/ci/travis_install_conda.sh b/ci/travis_install_conda.sh index ffa017c..9c13b1b 100644 --- a/ci/travis_install_conda.sh +++ b/ci/travis_install_conda.sh @@ -15,9 +15,9 @@ set -e if [ $TRAVIS_OS_NAME == "linux" ]; then - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" else - MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda-latest-MacOSX-x86_64.sh" + MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh" fi wget -O miniconda.sh $MINICONDA_URL http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/ipc-file-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/ipc-file-test.cc b/cpp/src/arrow/ipc/ipc-file-test.cc index e58f2cf..0c95c8e 100644 --- a/cpp/src/arrow/ipc/ipc-file-test.cc +++ b/cpp/src/arrow/ipc/ipc-file-test.cc @@ -180,7 +180,7 @@ TEST_P(TestStreamFormat, RoundTrip) { #define BATCH_CASES() \ ::testing::Values(&MakeIntRecordBatch, &MakeListRecordBatch, &MakeNonNullRecordBatch, \ &MakeZeroLengthRecordBatch, &MakeDeeplyNestedList, &MakeStringTypesRecordBatch, \ - &MakeStruct, &MakeDictionary); + &MakeStruct, &MakeUnion, &MakeDictionary); INSTANTIATE_TEST_CASE_P(FileRoundTripTests, TestFileFormat, BATCH_CASES()); INSTANTIATE_TEST_CASE_P(StreamRoundTripTests, TestStreamFormat, BATCH_CASES()); http://git-wip-us.apache.org/repos/asf/arrow/blob/0637e05d/cpp/src/arrow/ipc/metadata-internal.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc index 1cc4a23..17a3a5f 100644 --- a/cpp/src/arrow/ipc/metadata-internal.cc +++ b/cpp/src/arrow/ipc/metadata-internal.cc @@ -78,43 +78,6 @@ static Status FloatFromFlatuffer( return Status::OK(); } -static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, - const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) { - switch (type) { - case flatbuf::Type_NONE: - return Status::Invalid("Type metadata cannot be none"); - case flatbuf::Type_Int: - return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out); - case flatbuf::Type_FloatingPoint: - return FloatFromFlatuffer( - static_cast<const flatbuf::FloatingPoint*>(type_data), out); - case flatbuf::Type_Binary: - *out = binary(); - return Status::OK(); - case flatbuf::Type_Utf8: - *out = utf8(); - return Status::OK(); - case flatbuf::Type_Bool: - *out = boolean(); - return Status::OK(); - case flatbuf::Type_Decimal: - case flatbuf::Type_Timestamp: - case flatbuf::Type_List: - if (children.size() != 1) { - return Status::Invalid("List must have exactly 1 child field"); - } - *out = std::make_shared<ListType>(children[0]); - return Status::OK(); - case flatbuf::Type_Struct_: - *out = std::make_shared<StructType>(children); - return Status::OK(); - case flatbuf::Type_Union: - return Status::NotImplemented("Type is not implemented"); - default: - return Status::Invalid("Unrecognized type"); - } -} - // Forward declaration static Status FieldToFlatbuffer(FBB& fbb, const std::shared_ptr<Field>& field, DictionaryMemo* dictionary_memo, FieldOffset* offset); @@ -153,6 +116,32 @@ static Status StructToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type return Status::OK(); } +// ---------------------------------------------------------------------- +// Union implementation + +static Status UnionFromFlatbuffer(const flatbuf::Union* union_data, + const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) { + UnionMode mode = union_data->mode() == flatbuf::UnionMode_Sparse ? UnionMode::SPARSE + : UnionMode::DENSE; + + std::vector<uint8_t> type_codes; + + const flatbuffers::Vector<int32_t>* fb_type_ids = union_data->typeIds(); + if (fb_type_ids == nullptr) { + for (uint8_t i = 0; i < children.size(); ++i) { + type_codes.push_back(i); + } + } else { + for (int32_t id : (*fb_type_ids)) { + // TODO(wesm): can these values exceed 255? + type_codes.push_back(static_cast<uint8_t>(id)); + } + } + + *out = union_(children, type_codes, mode); + return Status::OK(); +} + static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type, std::vector<FieldOffset>* out_children, DictionaryMemo* dictionary_memo, Offset* offset) { @@ -181,6 +170,44 @@ static Status UnionToFlatBuffer(FBB& fbb, const std::shared_ptr<DataType>& type, *offset = IntToFlatbuffer(fbb, BIT_WIDTH, IS_SIGNED); \ break; +static Status TypeFromFlatbuffer(flatbuf::Type type, const void* type_data, + const std::vector<std::shared_ptr<Field>>& children, std::shared_ptr<DataType>* out) { + switch (type) { + case flatbuf::Type_NONE: + return Status::Invalid("Type metadata cannot be none"); + case flatbuf::Type_Int: + return IntFromFlatbuffer(static_cast<const flatbuf::Int*>(type_data), out); + case flatbuf::Type_FloatingPoint: + return FloatFromFlatuffer( + static_cast<const flatbuf::FloatingPoint*>(type_data), out); + case flatbuf::Type_Binary: + *out = binary(); + return Status::OK(); + case flatbuf::Type_Utf8: + *out = utf8(); + return Status::OK(); + case flatbuf::Type_Bool: + *out = boolean(); + return Status::OK(); + case flatbuf::Type_Decimal: + case flatbuf::Type_Timestamp: + case flatbuf::Type_List: + if (children.size() != 1) { + return Status::Invalid("List must have exactly 1 child field"); + } + *out = std::make_shared<ListType>(children[0]); + return Status::OK(); + case flatbuf::Type_Struct_: + *out = std::make_shared<StructType>(children); + return Status::OK(); + case flatbuf::Type_Union: + return UnionFromFlatbuffer( + static_cast<const flatbuf::Union*>(type_data), children, out); + default: + return Status::Invalid("Unrecognized type"); + } +} + // TODO(wesm): Convert this to visitor pattern static Status TypeToFlatbuffer(FBB& fbb, const std::shared_ptr<DataType>& type, std::vector<FieldOffset>* children, std::vector<VectorLayoutOffset>* layout,
