judahrand commented on code in PR #34234:
URL: https://github.com/apache/arrow/pull/34234#discussion_r1125727946


##########
python/pyarrow/_exec_plan.pyx:
##########
@@ -390,6 +390,120 @@ def _perform_join(join_type, left_operand not None, 
left_keys,
     return result_table
 
 
+def _perform_join_asof(left_operand not None, left_on, left_by,
+                       right_operand not None, right_on, right_by,
+                       tolerance, output_type=Table):
+    """
+    Perform asof join of two tables or datasets.
+
+    The result will be an output table with the result of the join operation
+
+    Parameters
+    ----------
+    left_operand : Table or Dataset
+        The left operand for the join operation.
+    left_on : str
+        The left key (or keys) on which the join operation should be performed.
+    left_by: str or list[str]
+        The left key (or keys) on which the join operation should be performed.
+    right_operand : Table or Dataset
+        The right operand for the join operation.
+    right_on : str or list[str]
+        The right key (or keys) on which the join operation should be 
performed.
+    right_by: str or list[str]
+        The right key (or keys) on which the join operation should be 
performed.
+    tolerance : int
+        The tolerance to use for the asof join. The tolerance is interpreted in
+        the same units as the "on" key.
+    output_type: Table or InMemoryDataset
+        The output type for the exec plan result.
+
+    Returns
+    -------
+    result_table : Table or InMemoryDataset
+    """
+    cdef:
+        vector[CFieldRef] c_left_by
+        vector[CFieldRef] c_right_by
+        CAsofJoinKeys c_left_keys
+        CAsofJoinKeys c_right_keys
+        vector[CAsofJoinKeys] c_input_keys
+        vector[CDeclaration] c_decl_plan
+
+    # Prepare left AsofJoinNodeOption::Keys
+    if isinstance(left_by, str):
+        left_by = [left_by]
+    for key in left_by:
+        c_left_by.push_back(CFieldRef(<c_string>tobytes(key)))
+
+    c_left_keys.on_key = CFieldRef(<c_string>tobytes(left_on))
+    c_left_keys.by_key = c_left_by
+
+    c_input_keys.push_back(c_left_keys)
+
+    # Prepare right AsofJoinNodeOption::Keys
+    right_by_order = {}
+    if isinstance(right_by, str):
+        right_by = [right_by]
+    for key in right_by:
+        c_right_by.push_back(CFieldRef(<c_string>tobytes(key)))
+
+    c_right_keys.on_key = CFieldRef(<c_string>tobytes(right_on))
+    c_right_keys.by_key = c_right_by
+
+    c_input_keys.push_back(c_right_keys)
+
+    # By default expose all columns on both left and right table
+    if isinstance(left_operand, Table):
+        left_columns = left_operand.column_names
+    elif isinstance(left_operand, Dataset):
+        left_columns = left_operand.schema.names
+    else:
+        raise TypeError("Unsupported left join member type")

Review Comment:
   The `left_columns/right_columns` variables were also used to filter out the 
'special' Dataset columns which we get back if the operands are datasets. This 
isn't currently an issue due to the temporary conversion to Tables due to the 
lack of ScanNodeOptions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to