jorisvandenbossche commented on code in PR #36162:
URL: https://github.com/apache/arrow/pull/36162#discussion_r1246861245
##########
python/pyarrow/tests/test_convert_builtin.py:
##########
@@ -2363,3 +2363,141 @@ def test_array_from_pylist_offset_overflow():
assert isinstance(arr, pa.ChunkedArray)
assert len(arr) == 2**31
assert len(arr.chunks) > 1
+
+
+@parametrize_with_collections_types
[email protected](('data', 'scalar_data', 'value_type'), [
+ ([True, False, None], [pa.scalar(True), pa.scalar(False), None],
pa.bool_()),
+ (
+ [1, 2, None],
+ [pa.scalar(1), pa.scalar(2), pa.scalar(None, pa.int64())],
+ pa.int64()
+ ),
+ ([1, None, None], [pa.scalar(1), None, pa.scalar(None, pa.int64())],
pa.int64()),
+ ([None, None], [pa.scalar(None), pa.scalar(None)], pa.null()),
+ ([1., 2., None], [pa.scalar(1.), pa.scalar(2.), None], pa.float64()),
+ (
+ [None, datetime.date.today()],
+ [None, pa.scalar(datetime.date.today())],
+ pa.date32()
+ ),
+ (
+ [None, datetime.date.today()],
+ [None, pa.scalar(datetime.date.today(), pa.date64())],
+ pa.date64()
+ ),
+ (
+ [datetime.time(1, 1, 1), None],
+ [pa.scalar(datetime.time(1, 1, 1)), None],
+ pa.time64('us')
+ ),
+ (
+ [datetime.timedelta(seconds=10)],
+ [pa.scalar(datetime.timedelta(seconds=10))],
+ pa.duration('us')
+ ),
+ (
+ [None, datetime.datetime(2014, 1, 1)],
+ [None, pa.scalar(datetime.datetime(2014, 1, 1))],
+ pa.timestamp('us')
+ ),
+ (
+ [pa.MonthDayNano([1, -1, -10100])],
+ [pa.scalar(pa.MonthDayNano([1, -1, -10100]))],
+ pa.month_day_nano_interval()
+ ),
+ (["a", "b"], [pa.scalar("a"), pa.scalar("b")], pa.string()),
+ ([b"a", b"b"], [pa.scalar(b"a"), pa.scalar(b"b")], pa.binary()),
+ (
+ [b"a", b"b"],
+ [pa.scalar(b"a", pa.binary(1)), pa.scalar(b"b", pa.binary(1))],
+ pa.binary(1)
+ ),
+ ([[1, 2, 3]], [pa.scalar([1, 2, 3])], pa.list_(pa.int64())),
+ ([["a", "b"]], [pa.scalar(["a", "b"])], pa.list_(pa.string())),
+ (
+ [1, 2, None],
+ [pa.scalar(1, type=pa.int8()), pa.scalar(2, type=pa.int8()), None],
+ pa.int8()
+ ),
+ ([1, None], [pa.scalar(1.0, type=pa.int32()), None], pa.int32()),
+ (
+ ["aaa", "bbb"],
+ [pa.scalar("aaa", type=pa.binary(3)), pa.scalar("bbb",
type=pa.binary(3))],
+ pa.binary(3)),
+ ([b"a"], [pa.scalar("a", type=pa.large_binary())], pa.large_binary()),
+ (["a"], [pa.scalar("a", type=pa.large_string())], pa.large_string()),
+ (
+ ["a"],
+ [pa.scalar("a", type=pa.dictionary(pa.int64(), pa.string()))],
+ pa.dictionary(pa.int64(), pa.string())
+ ),
+ (
+ ["a", "b"],
+ [pa.scalar("a", pa.dictionary(pa.int64(), pa.string())),
+ pa.scalar("b", pa.dictionary(pa.int64(), pa.string()))],
+ pa.dictionary(pa.int64(), pa.string())
+ ),
+ (
+ [1],
+ [pa.scalar(1, type=pa.dictionary(pa.int64(), pa.int32()))],
+ pa.dictionary(pa.int64(), pa.int32())
+ ),
+ (
+ [(1, 2)],
+ [pa.scalar([('a', 1), ('b', 2)], type=pa.struct(
+ [('a', pa.int8()), ('b', pa.int8())]))],
+ pa.struct([('a', pa.int8()), ('b', pa.int8())])
+ ),
+ (
+ [(1, 'bar')],
+ [pa.scalar([('a', 1), ('b', 'bar')], type=pa.struct(
+ [('a', pa.int8()), ('b', pa.string())]))],
+ pa.struct([('a', pa.int8()), ('b', pa.string())])
+ )
+])
+def test_array_accepts_pyarrow_scalar(seq, data, scalar_data, value_type):
+ if type(seq(scalar_data)) == set:
+ pytest.skip("TODO: The elements in the set get reordered.")
Review Comment:
One more thing I forgot, we should maybe resolve this comment. Either just
keep skipping it (and remove the TODO), or actually address it.
The actual behaviour is expected: the iteration order of a set is not
defined (and not the same as the order it was created), so it is OK that this
doesn't pass out of the box. The question is whether we want to change the
assert specifically for this case, or just leave it as is (the skip)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]