mrkn commented on a change in pull request #7898:
URL: https://github.com/apache/arrow/pull/7898#discussion_r478143508
##########
File path: cpp/src/arrow/array/array_dict_test.cc
##########
@@ -904,6 +904,67 @@ TEST(TestDecimalDictionaryBuilder, DoubleTableSize) {
ASSERT_TRUE(expected.Equals(result));
}
+TEST(TestNullDictionaryBuilder, Basic) {
+ // MakeBuilder
+ auto dict_type = dictionary(int8(), null());
+ std::unique_ptr<ArrayBuilder> boxed_builder;
+ ASSERT_OK(MakeBuilder(default_memory_pool(), dict_type, &boxed_builder));
+ auto& builder = checked_cast<DictionaryBuilder<NullType>&>(*boxed_builder);
+
+ ASSERT_OK(builder.AppendNull());
+ ASSERT_OK(builder.AppendNull());
+ ASSERT_OK(builder.AppendNull());
+ ASSERT_EQ(3, builder.length());
+ ASSERT_EQ(3, builder.null_count());
+
+ ASSERT_OK(builder.AppendNulls(4));
+ ASSERT_EQ(7, builder.length());
+ ASSERT_EQ(7, builder.null_count());
+
+ auto int_array = ArrayFromJSON(int8(), "[0, 1, 0, null]");
+ ASSERT_OK(builder.AppendArray(*int_array));
Review comment:
@pitrou I tried to directly insert `DCHECK` in `AppendArray`, but I
found it is difficult because `AppendArray` is defined in `builder_dict.h`.
That is a public header file` so we cannot use `DCHECK` there.
Instead of directly inserting `DCHECK`, the candidate patch is below.
```diff
diff --git a/cpp/src/arrow/array/builder_dict.cc
b/cpp/src/arrow/array/builder_dict.cc
index 54fd94856..dc239f268 100644
--- a/cpp/src/arrow/array/builder_dict.cc
+++ b/cpp/src/arrow/array/builder_dict.cc
@@ -189,5 +189,11 @@ Status DictionaryMemoTable::InsertValues(const Array&
array) {
int32_t DictionaryMemoTable::size() const { return impl_->size(); }
+#ifndef NDEBUG
+void CheckArrayType(const Type::type type_id, const Array& array, const
std::string& message) {
+ DCHECK_EQ(type_id, array.type_id()) << message;
+}
+#endif
+
} // namespace internal
} // namespace arrow
diff --git a/cpp/src/arrow/array/builder_dict.h
b/cpp/src/arrow/array/builder_dict.h
index f8c77a5b0..a0d02fd1b 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -93,6 +93,10 @@ class ARROW_EXPORT DictionaryMemoTable {
std::unique_ptr<DictionaryMemoTableImpl> impl_;
};
+#ifndef NDEBUG
+void CheckArrayType(const Type::type type_id, const Array& array, const
std::string& message);
+#endif
+
/// \brief Array builder for created encoded DictionaryArray from
/// dense array
///
@@ -248,6 +252,10 @@ class DictionaryBuilderBase : public ArrayBuilder {
const Array& array) {
using ArrayType = typename TypeTraits<T>::ArrayType;
+#ifndef NDEBUG
+ CheckArrayType(T::type_id, array, "Wrong value type of array to be
appended");
+#endif
+
const auto& concrete_array = static_cast<const ArrayType&>(array);
for (int64_t i = 0; i < array.length(); i++) {
if (array.IsNull(i)) {
@@ -406,6 +414,9 @@ class DictionaryBuilderBase<BuilderType, NullType> :
public ArrayBuilder {
/// \brief Append a whole dense array to the builder
Status AppendArray(const Array& array) {
+#ifndef NDEBUG
+ CheckArrayType(NullType::type_id, array, "Wrong value type of array to
be appended");
+#endif
for (int64_t i = 0; i < array.length(); i++) {
ARROW_RETURN_NOT_OK(AppendNull());
}
};
} // namespace internal
```
I don't think this approach is good. Can I follow this way?
By the way, I found that `AppendArray` for `DictionaryBuilderBase` of
`FixedSizedBinaryType` checks the array type at the beginning of the function
and return `Status::Invalid` when mismatching type types. I guess if this kind
of check is accepted for `FixedSizedBinaryType`, the same kind of check can be
put in the other `AppendArray` functions.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]