judahrand commented on code in PR #34234:
URL: https://github.com/apache/arrow/pull/34234#discussion_r1319949676
##########
python/pyarrow/tests/test_exec_plan.py:
##########
@@ -321,3 +321,77 @@ def test_join_extension_array_column():
result = ep._perform_join(
"left outer", t1, ["colB"], t3, ["colC"])
assert result["colB"] == pa.chunked_array(ext_array)
+
+
[email protected]("tolerance,expected", [
+ (
+ 1,
+ {
+ "colA": [1, 1, 5, 6, 7],
+ "col2": ["a", "b", "a", "b", "f"],
+ "colC": [1., None, None, None, None],
+ },
+ ),
+ (
+ 3,
+ {
+ "colA": [1, 1, 5, 6, 7],
+ "col2": ["a", "b", "a", "b", "f"],
+ "colC": [1., None, None, 3., None],
+ },
+ ),
+ (
+ -5,
+ {
+ "colA": [1, 1, 5, 6, 7],
+ "col2": ["a", "b", "a", "b", "f"],
+ "colC": [None, None, 1., None, None],
+ },
+ ),
+])
[email protected]("use_datasets", [False, True])
+def test_join_asof(tolerance, expected, use_datasets):
+ # Allocate table here instead of using parametrize
+ # this prevents having arrow allocated memory forever around.
+ expected = pa.table(expected)
+
+ t1 = pa.Table.from_pydict({
+ "colA": [1, 1, 5, 6, 7],
+ "col2": ["a", "b", "a", "b", "f"]
+ })
+
+ t2 = pa.Table.from_pydict({
+ "colB": [2, 9, 15],
+ "col3": ["a", "b", "g"],
+ "colC": [1., 3., 5.]
+ })
+
+ if use_datasets:
+ t1 = ds.dataset([t1])
+ t2 = ds.dataset([t2])
+
+ r = ep._perform_join_asof(t1, "colA", "col2", t2, "colB", "col3",
tolerance)
+ r = r.combine_chunks()
+ r = r.sort_by("colA")
+ assert r == expected
+
+
+def test_table_join_asof_collisions():
+ t1 = pa.table({
+ "colA": [1, 2, 6],
+ "colB": [10, 20, 60],
+ "on": [1, 2, 3],
+ "colVals": ["a", "b", "f"]
+ })
+
+ t2 = pa.table({
+ "colB": [99, 20, 10],
+ "colVals": ["Z", "B", "A"],
+ "colUniq": [100, 200, 300],
+ "colA": [99, 2, 1],
+ "on": [2, 3, 4],
+ })
+
+ msg = "colVals present in both tables. AsofJoin does not support column
collisions."
+ with pytest.raises(ValueError, match=msg):
+ ep._perform_join_asof(t1, "on", ["colA", "colB"], t2, "on", ["colA",
"colB"], 1)
Review Comment:
https://github.com/apache/arrow/pull/34234/commits/102f965f83ba702997b74dd72626b78970cfb91a
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]