This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 0d32975ee1 GH-47380: [Python] Apply maps_as_pydicts to Nested
MapScalar Values (#47454)
0d32975ee1 is described below
commit 0d32975ee1f1d36b3f53968019ff2e377d0c0057
Author: Johanna <[email protected]>
AuthorDate: Tue Oct 7 09:19:32 2025 +0200
GH-47380: [Python] Apply maps_as_pydicts to Nested MapScalar Values (#47454)
### Rationale for this change
Currently, the `maps_as_pydicts` parameter to `MapScalar.as_py` does not
work on nested maps. See below:
```
import pyarrow as pa
t = pa.struct([pa.field("x", pa.map_(pa.string(), pa.map_(pa.string(),
pa.int8())))])
v = {"x": {"a": {"1": 1}}}
s = pa.scalar(v, type=t)
print(s.as_py(maps_as_pydicts="strict"))
# {'x': {'a': [('1', 1)]}}
```
In this ^ case, I'd want to get the value: `{'x': {'a': {'1': 1}}}`, such
that round trips would work as expected.
### What changes are included in this PR?
Begin to apply the `maps_as_pydicts` to nested values in map types as well,
update relevant test.
### Are these changes tested?
Yes
### Are there any user-facing changes?
Yes, just a user-facing fix.
* GitHub Issue: #47380
Lead-authored-by: Johanna <[email protected]>
Co-authored-by: zzkv <[email protected]>
Co-authored-by: Johanna <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
python/pyarrow/scalar.pxi | 7 +++++--
python/pyarrow/tests/test_scalars.py | 31 ++++++++++++++++++++++++++++++-
2 files changed, 35 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 5934a7aa8c..83cabcf447 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -1173,7 +1173,10 @@ cdef class MapScalar(ListScalar, Mapping):
if not maps_as_pydicts:
return list(self)
result_dict = {}
- for key, value in self:
+ if self.values is None:
+ return result_dict
+
+ for key, value in zip(self.keys(),
self.values.field(self.type.item_field.name)):
if key in result_dict:
if maps_as_pydicts == "strict":
raise KeyError(
@@ -1183,7 +1186,7 @@ cdef class MapScalar(ListScalar, Mapping):
else:
warnings.warn(
f"Encountered key '{key}' which was already
encountered.")
- result_dict[key] = value
+ result_dict[key] = value.as_py(maps_as_pydicts=maps_as_pydicts)
return result_dict
def keys(self):
diff --git a/python/pyarrow/tests/test_scalars.py
b/python/pyarrow/tests/test_scalars.py
index 0f62dd98f8..65f0c60813 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -956,7 +956,7 @@ def test_map_scalar_as_py_with_custom_field_name():
).as_py() == [("foo", "bar")]
-def test_nested_map_types_with_maps_as_pydicts():
+def test_map_types_with_maps_as_pydicts():
ty = pa.struct([
pa.field('x', pa.map_(pa.string(), pa.int8())),
pa.field('y', pa.list_(pa.map_(pa.string(), pa.int8()))),
@@ -966,3 +966,32 @@ def test_nested_map_types_with_maps_as_pydicts():
s = pa.scalar(v, type=ty)
assert s.as_py(maps_as_pydicts="strict") == v
+
+
+def test_nested_map_types_with_maps_as_pydicts():
+ ty = pa.struct(
+ [
+ pa.field('x', pa.map_(pa.string(), pa.map_(pa.string(),
pa.int8()))),
+ pa.field(
+ 'y', pa.list_(pa.map_(pa.string(), pa.map_(pa.string(),
pa.int8())))
+ ),
+ ]
+ )
+
+ v = {'x': {'a': {'1': 1}}, 'y': [{'b': {'2': 2}}, {'c': {'3': 3}}]}
+ s = pa.scalar(v, type=ty)
+
+ assert s.as_py(maps_as_pydicts="strict") == v
+
+
+def test_map_scalar_with_empty_values():
+ map_type = pa.struct(
+ [
+ pa.field('x', pa.map_(pa.string(), pa.string())),
+ ]
+ )
+
+ v = {'x': {}}
+ s = pa.scalar(v, type=map_type)
+
+ assert s.as_py(maps_as_pydicts="strict") == v