[
https://issues.apache.org/jira/browse/ARROW-1882?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16279247#comment-16279247
]
ASF GitHub Bot commented on ARROW-1882:
---------------------------------------
wesm commented on a change in pull request #1388: ARROW-1882: [C++] Reintroduce
DictionaryBuilder
URL: https://github.com/apache/arrow/pull/1388#discussion_r155085087
##########
File path: cpp/src/arrow/array-test.cc
##########
@@ -1619,6 +1619,353 @@ TEST_F(TestAdaptiveUIntBuilder, TestAppendVector) {
ASSERT_TRUE(expected_->Equals(result_));
}
+// ----------------------------------------------------------------------
+// Dictionary tests
+
+template <typename Type>
+class TestDictionaryBuilder : public TestBuilder {};
+
+typedef ::testing::Types<Int8Type, UInt8Type, Int16Type, UInt16Type, Int32Type,
+ UInt32Type, Int64Type, UInt64Type, FloatType,
DoubleType>
+ PrimitiveDictionaries;
+
+TYPED_TEST_CASE(TestDictionaryBuilder, PrimitiveDictionaries);
+
+TYPED_TEST(TestDictionaryBuilder, Basic) {
+ DictionaryBuilder<TypeParam> builder(default_memory_pool());
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+
+ std::shared_ptr<Array> result;
+ ASSERT_OK(builder.Finish(&result));
+
+ // Build expected data
+ NumericBuilder<TypeParam> dict_builder;
+ ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(1)));
+ ASSERT_OK(dict_builder.Append(static_cast<typename TypeParam::c_type>(2)));
+ std::shared_ptr<Array> dict_array;
+ ASSERT_OK(dict_builder.Finish(&dict_array));
+ auto dtype = std::make_shared<DictionaryType>(int8(), dict_array);
+
+ Int8Builder int_builder;
+ ASSERT_OK(int_builder.Append(0));
+ ASSERT_OK(int_builder.Append(1));
+ ASSERT_OK(int_builder.Append(0));
+ std::shared_ptr<Array> int_array;
+ ASSERT_OK(int_builder.Finish(&int_array));
+
+ DictionaryArray expected(dtype, int_array);
+ ASSERT_TRUE(expected.Equals(result));
+}
+
+TYPED_TEST(TestDictionaryBuilder, ArrayConversion) {
+ NumericBuilder<TypeParam> builder;
+ // DictionaryBuilder<TypeParam> builder;
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(2)));
+ ASSERT_OK(builder.Append(static_cast<typename TypeParam::c_type>(1)));
+
+ std::shared_ptr<Array> intermediate_result;
+ ASSERT_OK(builder.Finish(&intermediate_result));
+ DictionaryBuilder<TypeParam> dictionary_builder(default_memory_pool());
+ ASSERT_OK(dictionary_builder.AppendArray(*intermediate_result));
Review comment:
We might consider removing these `AppendArray` methods in favor of the
kernel-based approach
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> [C++] Reintroduce DictionaryBuilder
> -----------------------------------
>
> Key: ARROW-1882
> URL: https://issues.apache.org/jira/browse/ARROW-1882
> Project: Apache Arrow
> Issue Type: Bug
> Components: C++
> Reporter: Uwe L. Korn
> Assignee: Uwe L. Korn
> Priority: Critical
> Labels: pull-request-available
> Fix For: 0.8.0
>
>
> We need the {{DictionaryBuilder}} to incrementally build Arrow Arrays of
> {{DictionaryType}}. The kernels only support en-bloc conversions of Arrays
> which yields a higher memory usage.
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)