alamb commented on code in PR #15557:
URL: https://github.com/apache/datafusion/pull/15557#discussion_r2026914599
##########
datafusion/physical-plan/src/joins/sort_merge_join.rs:
##########
@@ -823,42 +823,65 @@ impl BufferedBatch {
/// Sort-Merge join stream that consumes streamed and buffered data streams
/// and produces joined output stream.
struct SortMergeJoinStream {
- /// Current state of the stream
- pub state: SortMergeJoinState,
+ // ========================================================================
+ // PROPERTIES:
+ // These fields are initialized at the start and remain constant throughout
+ // the execution.
+ // ========================================================================
/// Output schema
pub schema: SchemaRef,
- /// Sort options of join columns used to sort streamed and buffered data
stream
- pub sort_options: Vec<SortOptions>,
/// null == null?
pub null_equals_null: bool,
+ /// Sort options of join columns used to sort streamed and buffered data
stream
+ pub sort_options: Vec<SortOptions>,
+ /// optional join filter
+ pub filter: Option<JoinFilter>,
+ /// How the join is performed
+ pub join_type: JoinType,
+ /// Target output batch size
+ pub batch_size: usize,
+
+ // ========================================================================
+ // STREAMED FIELDS:
+ // These fields manage the properties and state of the streamed input.
+ // ========================================================================
/// Input schema of streamed
pub streamed_schema: SchemaRef,
- /// Input schema of buffered
- pub buffered_schema: SchemaRef,
/// Streamed data stream
pub streamed: SendableRecordBatchStream,
- /// Buffered data stream
- pub buffered: SendableRecordBatchStream,
/// Current processing record batch of streamed
pub streamed_batch: StreamedBatch,
- /// Current buffered data
- pub buffered_data: BufferedData,
/// (used in outer join) Is current streamed row joined at least once?
pub streamed_joined: bool,
- /// (used in outer join) Is current buffered batches joined at least once?
- pub buffered_joined: bool,
/// State of streamed
pub streamed_state: StreamedState,
- /// State of buffered
- pub buffered_state: BufferedState,
- /// The comparison result of current streamed row and buffered batches
- pub current_ordering: Ordering,
/// Join key columns of streamed
pub on_streamed: Vec<PhysicalExprRef>,
+
+ // ========================================================================
Review Comment:
It might also help to put these into their own struct perhaps -- maybe as
fields on BufferedState
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]