milesgranger commented on code in PR #14395:
URL: https://github.com/apache/arrow/pull/14395#discussion_r1011534635


##########
python/pyarrow/tests/test_compute.py:
##########
@@ -2929,3 +2930,90 @@ def test_cast_table_raises():
 
     with pytest.raises(pa.lib.ArrowInvalid):
         pc.cast(table, pa.int64())
+
+
+@pytest.mark.parametrize("start,stop,expected", (
+    (0, 1, [[1], [4], [6], None]),
+    (0, 2, [[1, 2], [4, 5], [6, None], None]),
+    (1, 2, [[2], [5], [None], None]),
+    (2, 4, [[3, None], [None, None], [None, None], None])
+))
+@pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
+@pytest.mark.parametrize("list_type", (pa.list_, pa.large_list, "fixed"))
+def test_list_slice_output_fixed(start, stop, expected, value_type, list_type):
+    if list_type == "fixed":
+        arr = pa.array([[1, 2, 3], [4, 5, None], [6, None, None], None],
+                       pa.list_(pa.int8(), 3)).cast(pa.list_(value_type(), 3))
+    else:
+        arr = pa.array([[1, 2, 3], [4, 5], [6], None],
+                       pa.list_(pa.int8())).cast(list_type(value_type()))
+    result = pc.list_slice(arr, start, stop)  # default is to return fixed size
+    pylist = result.cast(pa.list_(pa.int8(), stop-start)).to_pylist()
+    assert pylist == expected
+
+
+@pytest.mark.parametrize("start,stop", (
+    (0, 1,),
+    (0, 2,),
+    (1, 2,),
+    (2, 4,)
+))
+@pytest.mark.parametrize("value_type", (pa.string, pa.int16, pa.float64))
+@pytest.mark.parametrize("list_type", (pa.list_, pa.large_list, "fixed"))
+def test_list_slice_output_variable(start, stop, value_type, list_type):
+    if list_type == "fixed":
+        data = [[1, 2, 3], [4, 5, None], [6, None, None], None]
+        arr = pa.array(
+            data,
+            pa.list_(pa.int8(), 3)).cast(pa.list_(value_type(), 3))
+    else:
+        data = [[1, 2, 3], [4, 5], [6], None]
+        arr = pa.array(data,
+                       pa.list_(pa.int8())).cast(list_type(value_type()))
+
+    # Gets same list type (ListArray vs LargeList)
+    if list_type == "fixed":
+        list_type = pa.list_  # non fixed output type
+
+    result = pc.list_slice(arr, start, stop, return_fixed_size_list=False)
+    assert result.type == list_type(value_type())
+
+    pylist = result.cast(pa.list_(pa.int8())).to_pylist()
+
+    # Variable output slicing follows Python's slice semantics
+    expected = [d[start:stop] if d is not None else None for d in data]
+    assert pylist == expected
+
+
+def test_list_slice_bad_parameters():
+    arr = pa.array([[1]], pa.list_(pa.int8(), 1))
+    msg = r"`start`(.*) should be greater than 0 and smaller than `stop`(.*)"
+    with pytest.raises(pa.ArrowInvalid, match=msg):
+        pc.list_slice(arr, -1)  # negative start?
+    with pytest.raises(pa.ArrowInvalid, match=msg):
+        pc.list_slice(arr, 2, 1)  # start > stop?
+
+    # TODO: start==stop -> empty lists
+    with pytest.raises(pa.ArrowInvalid, match=msg):
+        pc.list_slice(arr, 0, 0)  # start == stop?
+
+    # TODO: support step in slicing
+    msg = "Setting `step` to anything other than 1 is not supported; got 
step=2"
+    with pytest.raises(NotImplementedError, match=msg):
+        pc.list_slice(arr, 0, 1, step=2)
+
+    # TODO: support stop == -1; slice to end
+    msg = "Setting `stop==-1` to signify slicing to end, not yet implemented."
+    with pytest.raises(NotImplementedError, match=msg):
+        pc.list_slice(arr, 0, -1)

Review Comment:
   >> Ah, now I see this…can we just implement it in Python?
   
   I suppose, but if we're gong to have a TODO to support it in C++, is it 
worth waiting for that?
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to