bkietz commented on code in PR #47531:
URL: https://github.com/apache/arrow/pull/47531#discussion_r2383448403
##########
cpp/src/arrow/stl_iterator_test.cc:
##########
@@ -545,5 +607,62 @@ TEST(ChunkedArrayIterator, ForEachIterator) {
ASSERT_EQ(values, expected);
}
+TEST(ChunkedArrayIterator, CustomValueAccessorDictionary) {
+ // Create multiple dictionary arrays with the same dictionary
+ auto dict = ArrayFromJSON(utf8(), R"(["red", "green", "blue", "yellow"])");
+
+ auto indices1 = ArrayFromJSON(int32(), "[0, 1, 2]");
+ auto indices2 = ArrayFromJSON(int32(), "[3, 2, null]");
+ auto indices3 = ArrayFromJSON(int32(), "[1, 0, 3, 2]");
+
+ auto dict_type = dictionary(int32(), utf8());
+ auto dict_array1 = std::make_shared<DictionaryArray>(dict_type, indices1,
dict);
+ auto dict_array2 = std::make_shared<DictionaryArray>(dict_type, indices2,
dict);
+ auto dict_array3 = std::make_shared<DictionaryArray>(dict_type, indices3,
dict);
+
+ // Create chunked array from dictionary arrays
+ auto chunked_array = std::make_shared<ChunkedArray>(
+ std::vector<std::shared_ptr<Array>>{dict_array1, dict_array2,
dict_array3},
+ dict_type);
+
+ // Use custom accessor to iterate over decoded values across chunks
+ auto it =
+ Begin<DictionaryType, DictionaryArray,
TestDictionaryValueAccessor>(*chunked_array);
Review Comment:
Alright, but even if you'd prefer not to modify `chunked_array.h`... I don't
think that the current PR is very usable since it requires so many redundant
template arguments. A non member function could still sidestep all the template
arguments:
```c++
auto accessor = [](const DictionaryArray& array, int64_t index) {
int64_t dict_index = array.GetValueIndex(index);
const auto& dict = checked_cast<const StringArray&>(*array.dictionary());
return dict->GetView(dict_index);
};
for (int i : Iterate(chunked_array, accessor)) {}
```
I think it'd be sufficient to write:
```c++
template <typename ValueAccessor>
auto Iterate(const ChunkedArray& chunked_array, ValueAccessor
value_accessor) {
using arrow::internal::call::traits;
static_assert(!call_traits::is_overloaded<ValueAccessor>::value,
"Cannot infer template arguments from overloaded
ValueAccessor");
using ArrayType = call_traits::argument_type<ValueAccessor, 0>;
return stl::ChunkedArrayRange<ArrayType, ValueAccessor>{&chunked_array,
value_accessor};
}
```
##########
cpp/src/arrow/stl_iterator.h:
##########
@@ -64,12 +64,13 @@ class ArrayIterator {
// Value access
value_type operator*() const {
assert(array_);
- return array_->IsNull(index_) ? value_type{} : array_->GetView(index_);
+ return array_->IsNull(index_) ? value_type{} : ValueAccessor{}(*array_,
index_);
Review Comment:
Please keep an instance of ValueAccessor as a data member here and
elsewhere; otherwise it must be default constructible and empty. It wouldn't
(for example) be able to reference a lookup table by reference unless that
table was global.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]