This is an automated email from the ASF dual-hosted git repository. raulcd pushed a commit to branch maint-16.x.x in repository https://gitbox.apache.org/repos/asf/arrow.git
commit b9d69946a3fa811564dcf6f7c73a1770b237f824 Author: Felipe Oliveira Carvalho <[email protected]> AuthorDate: Tue Apr 23 14:01:51 2024 -0300 GH-41353: [C++] Define bit_width and byte_width of ExtensionType in terms of the storage type (#41354) ### Rationale for this change Users and other classes within Arrow itself (e.g. array builders) expect extension types to behave like their underlying storage type. As it is now, `ExtensionType::bit_width()` is the default `DataType::bit_width()` implementation which returns `-1`. It should return the storage type's bit-width. ### What changes are included in this PR? Definition of `ExtensionType::bit_width/byte_width` functions. ### Are these changes tested? Tests added and confirmed to fail prior to these changes. ### Are there any user-facing changes? `ExtensionType` now define `bit_width` and `byte_width` according to their storage type. * GitHub Issue: #41353 Authored-by: Felipe Oliveira Carvalho <[email protected]> Signed-off-by: Felipe Oliveira Carvalho <[email protected]> --- cpp/src/arrow/extension_type.h | 3 +++ cpp/src/arrow/extension_type_test.cc | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/cpp/src/arrow/extension_type.h b/cpp/src/arrow/extension_type.h index 0fd7216f18..b3f085198b 100644 --- a/cpp/src/arrow/extension_type.h +++ b/cpp/src/arrow/extension_type.h @@ -54,6 +54,9 @@ class ARROW_EXPORT ExtensionType : public DataType { std::string name() const override { return "extension"; } + int32_t byte_width() const override { return storage_type_->byte_width(); } + int bit_width() const override { return storage_type_->bit_width(); } + /// \brief Unique name of extension type used to identify type for /// serialization /// \return the string name of the extension diff --git a/cpp/src/arrow/extension_type_test.cc b/cpp/src/arrow/extension_type_test.cc index 31222d7480..f104c984a6 100644 --- a/cpp/src/arrow/extension_type_test.cc +++ b/cpp/src/arrow/extension_type_test.cc @@ -196,6 +196,8 @@ TEST_F(TestExtensionType, ExtensionTypeTest) { auto type = uuid(); ASSERT_EQ(type->id(), Type::EXTENSION); + ASSERT_EQ(type->bit_width(), 128); + ASSERT_EQ(type->byte_width(), 16); const auto& ext_type = static_cast<const ExtensionType&>(*type); std::string serialized = ext_type.Serialize(); @@ -204,6 +206,9 @@ TEST_F(TestExtensionType, ExtensionTypeTest) { ext_type.Deserialize(fixed_size_binary(16), serialized)); ASSERT_TRUE(deserialized->Equals(*type)); ASSERT_FALSE(deserialized->Equals(*fixed_size_binary(16))); + ASSERT_EQ(deserialized->id(), Type::EXTENSION); + ASSERT_EQ(deserialized->bit_width(), 128); + ASSERT_EQ(deserialized->byte_width(), 16); } auto RoundtripBatch = [](const std::shared_ptr<RecordBatch>& batch,
