mikelui commented on code in PR #34730:
URL: https://github.com/apache/arrow/pull/34730#discussion_r1167563568
##########
python/pyarrow/src/arrow/python/arrow_to_pandas.cc:
##########
@@ -911,6 +877,97 @@ Status ConvertMap(PandasOptions options, const
ChunkedArray& data,
return Status::OK();
}
+Status ConvertMap(PandasOptions options, const ChunkedArray& data,
+ PyObject** out_values) {
+ // Get columns of underlying key/item arrays
+ std::vector<std::shared_ptr<Array>> key_arrays;
+ std::vector<std::shared_ptr<Array>> item_arrays;
+ for (int c = 0; c < data.num_chunks(); ++c) {
+ const auto& map_arr = checked_cast<const MapArray&>(*data.chunk(c));
+ key_arrays.emplace_back(map_arr.keys());
+ item_arrays.emplace_back(map_arr.items());
+ }
+
+ const auto& map_type = checked_cast<const MapType&>(*data.type());
+ auto key_type = map_type.key_type();
+ auto item_type = map_type.item_type();
+
+ // ARROW-6899: Convert dictionary-encoded children to dense instead of
+ // failing below. A more efficient conversion than this could be done later
+ if (key_type->id() == Type::DICTIONARY) {
+ auto dense_type = checked_cast<const
DictionaryType&>(*key_type).value_type();
+ RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &key_arrays));
+ key_type = dense_type;
+ }
+ if (item_type->id() == Type::DICTIONARY) {
+ auto dense_type = checked_cast<const
DictionaryType&>(*item_type).value_type();
+ RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &item_arrays));
+ item_type = dense_type;
+ }
+
+ // See notes in MakeInnerOptions.
+ options = MakeInnerOptions(std::move(options));
+ // Don't blindly convert because timestamps in lists are handled differently.
+ options.timestamp_as_object = true;
+
+ auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
+ auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
+ OwnedRefNoGIL owned_numpy_keys;
+ RETURN_NOT_OK(
+ ConvertChunkedArrayToPandas(options, flat_keys, nullptr,
owned_numpy_keys.ref()));
+ OwnedRefNoGIL owned_numpy_items;
+ RETURN_NOT_OK(
+ ConvertChunkedArrayToPandas(options, flat_items, nullptr,
owned_numpy_items.ref()));
+ PyArrayObject* py_keys =
reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
+ PyArrayObject* py_items =
reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
+
+ if (!options.maps_as_pydicts) {
+ // The default behavior to express an Arrow MAP as a list of [(key,
value), ...] pairs
+ OwnedRef list_item;
+ return ConvertMapHelper(
+ [&list_item](int64_t num_pairs) {
+ list_item.reset(PyList_New(num_pairs));
+ return CheckPyError();
+ },
+ [&list_item](int64_t idx, OwnedRef& key_value, OwnedRef& item_value) {
+ PyList_SET_ITEM(list_item.obj(), idx,
+ PyTuple_Pack(2, key_value.obj(), item_value.obj()));
+ return CheckPyError();
+ },
+ [&list_item]{ return list_item.detach(); },
+ data,
+ py_keys,
+ py_items,
+ item_arrays,
+ out_values);
+ } else {
+ // Use a native pydict
+ OwnedRef dict_item;
+ return ConvertMapHelper(
+ [&dict_item]([[maybe_unused]] int64_t) {
+ dict_item.reset(PyDict_New());
+ return CheckPyError();
+ },
+ [&dict_item]([[maybe_unused]] int64_t idx, OwnedRef& key_value,
OwnedRef& item_value) {
+ auto setitem_result =
+ PyDict_SetItem(dict_item.obj(), key_value.obj(),
item_value.obj());
+ RETURN_IF_PYERROR();
Review Comment:
This is already correctly handled and converted here:
https://github.com/kszucs/arrow/blob/master/cpp/src/arrow/python/common.cc#L144
But, I added an additional check and error message, anyway, to provide more
direct information to users so that they don't have to google and curiously
search through docs for the common case.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]