amol- commented on a change in pull request #10101:
URL: https://github.com/apache/arrow/pull/10101#discussion_r621178469
##########
File path: python/pyarrow/tests/test_array.py
##########
@@ -688,6 +688,69 @@ def test_dictionary_from_numpy():
assert d2[i].as_py() == dictionary[indices[i]]
+def test_dictionary_to_numpy():
+ expected = pa.array(
+ ["foo", "bar", None, "foo"]
+ ).to_numpy(zero_copy_only=False)
+ a = pa.DictionaryArray.from_arrays(
+ pa.array([0, 1, None, 0]),
+ pa.array(['foo', 'bar'])
+ )
+ assert (a.to_numpy(zero_copy_only=False) == expected).all()
+
+ with pytest.raises(pa.ArrowInvalid):
+ # If this would be changed to no longer raise in the future,
+ # ensure to test the actual result because, currently, to_numpy takes
+ # for granted that when zero_copy_only=True there will be no nulls
+ # (it's the decoding of the DictionaryArray that handles the nulls and
+ # this is only activated with zero_copy_only=False)
+ a.to_numpy(zero_copy_only=True)
+
+ anonulls = pa.DictionaryArray.from_arrays(
+ pa.array([0, 1, 1, 0]),
+ pa.array(['foo', 'bar'])
+ )
+ expected = pa.array(
+ ["foo", "bar", "bar", "foo"]
+ ).to_numpy(zero_copy_only=False)
+ assert (anonulls.to_numpy(zero_copy_only=False) == expected).all()
+
+ with pytest.raises(pa.ArrowInvalid):
+ anonulls.to_numpy(zero_copy_only=True)
+
+ afloat = pa.DictionaryArray.from_arrays(
+ pa.array([0, 1, 1, 0]),
+ pa.array([13.7, 11.0])
+ )
+ expected = pa.array([13.7, 11.0, 11.0, 13.7]).to_numpy()
+ assert (afloat.to_numpy(zero_copy_only=True) == expected).all()
+ assert (afloat.to_numpy(zero_copy_only=False) == expected).all()
+
+ afloat2 = pa.DictionaryArray.from_arrays(
+ pa.array([0, 1, None, 0]),
+ pa.array([13.7, 11.0])
+ )
+ expected = pa.array(
+ [13.7, 11.0, None, 13.7]
+ ).to_numpy(zero_copy_only=False)
+ assert np.array_equal(
+ afloat2.to_numpy(zero_copy_only=False),
+ expected,
+ equal_nan=True
+ )
+
+ aints = pa.DictionaryArray.from_arrays(
Review comment:
It was the test that pointed out that using `NaN` to represent nulls
wasn't working (only worked in `to_pandas`, not in `to_numpy`) as in case of
`numpy` arrays of ints there was no way to store `np.NaN` in the array.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]