HyukjinKwon commented on code in PR #45305:
URL: https://github.com/apache/spark/pull/45305#discussion_r1527735697
##########
python/pyspark/sql/datasource.py:
##########
@@ -516,6 +528,71 @@ def abort(self, messages: List["WriterCommitMessage"]) ->
None:
...
+class DataSourceStreamWriter(ABC):
+ """
+ A base class for data stream writers. Data stream writers are responsible
for writing
+ the data to the streaming sink.
+
+ .. versionadded: 4.0.0
+ """
+
+ @abstractmethod
+ def write(self, iterator: Iterator[Row]) -> "WriterCommitMessage":
+ """
+ Writes data into the streaming sink.
+
+ This method is called on executors to write data to the streaming data
sink in
+ each microbatch. It accepts an iterator of input data and returns a
single row
+ representing a commit message, or None if there is no commit message.
+
+ The driver collects commit messages, if any, from all executors and
passes them
+ to the ``commit`` method if all tasks run successfully. If any task
fails, the
+ ``abort`` method will be called with the collected commit messages.
+
+ Parameters
+ ----------
+ iterator : Iterator[Row]
+ An iterator of input data.
+
+ Returns
+ -------
+ WriterCommitMessage : a serializable commit message
+ """
+ ...
+
+ def commit(self, messages: List["WriterCommitMessage"], batchId: int) ->
None:
+ """
+ Commits this microbatch with a list of commit messages.
+
+ This method is invoked on the driver when all tasks run successfully.
The
+ commit messages are collected from the ``write`` method call from each
task,
+ and are passed to this method. The implementation should use the
commit messages
+ to commit the microbatch in the streaming sink.
+
+ Parameters
+ ----------
+ messages : List[WriterCommitMessage]
+ A list of commit messages.
+ """
+ ...
+
+ def abort(self, messages: List["WriterCommitMessage"], batchId: int) ->
None:
+ """
+ Aborts this microbatch due to task failures.
+
+ This method is invoked on the driver when one or more tasks failed.
The commit
+ messages are collected from the ``write`` method call from each task,
and are
+ passed to this method. The implementation should use the commit
messages to
+ abort the microbatch in the streaming sink.
+
+ Parameters
+ ----------
+ messages : List[WriterCommitMessage]
Review Comment:
`` list of :class:`WriterCommitMessage` ``
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]