amol- commented on code in PR #13409:
URL: https://github.com/apache/arrow/pull/13409#discussion_r1034589611


##########
python/pyarrow/_exec_plan.pyx:
##########
@@ -89,35 +93,42 @@ cdef execplan(inputs, output_type, vector[CDeclaration] 
plan, c_bool use_threads
     # Create source nodes for each input
     for ipt in inputs:
         if isinstance(ipt, Table):
-            node_factory = "table_source"
             c_in_table = pyarrow_unwrap_table(ipt)
             c_tablesourceopts = make_shared[CTableSourceNodeOptions](
                 c_in_table)
             c_input_node_opts = static_pointer_cast[CExecNodeOptions, 
CTableSourceNodeOptions](
                 c_tablesourceopts)
+
+            current_decl = CDeclaration(
+                tobytes("table_source"), no_c_inputs, c_input_node_opts)
         elif isinstance(ipt, Dataset):
-            node_factory = "scan"
             c_in_dataset = (<Dataset>ipt).unwrap()
             c_scanopts = make_shared[CScanNodeOptions](
-                c_in_dataset, make_shared[CScanOptions]())
-            deref(deref(c_scanopts).scan_options).use_threads = use_threads
+                c_in_dataset, Scanner._make_scan_options(ipt, {"use_threads": 
use_threads}))
             c_input_node_opts = static_pointer_cast[CExecNodeOptions, 
CScanNodeOptions](
                 c_scanopts)
+
+            # Filters applied in CScanNodeOptions are "best effort" for the 
scan node itself,
+            # so we always need to inject an additional Filter node to apply 
them for real.
+            current_decl = CDeclaration(tobytes("filter"), no_c_inputs,
+                                        static_pointer_cast[CExecNodeOptions, 
CFilterNodeOptions](
+                make_shared[CFilterNodeOptions](
+                    deref(deref(c_scanopts).scan_options).filter)
+            )
+            )
+            current_decl.inputs.push_back(
+                CDeclaration.Input(CDeclaration(
+                    tobytes("scan"), no_c_inputs, c_input_node_opts))

Review Comment:
   It does go first. In the sense that the Scan node is set as an `input` to 
the Filter node. That's why the Filter node is create first, so that it is then 
possible to append to its inputs the Scan node.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to