This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a1c96634bd Add more stream docs (#8192)
a1c96634bd is described below

commit a1c96634bd182e6cd90115544c7bdfeb30d752fb
Author: Raphael Taylor-Davies <[email protected]>
AuthorDate: Wed Nov 15 18:43:06 2023 +0000

    Add more stream docs (#8192)
---
 datafusion/core/src/datasource/stream.rs | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/datafusion/core/src/datasource/stream.rs 
b/datafusion/core/src/datasource/stream.rs
index cf95dd249a..fc19ff954d 100644
--- a/datafusion/core/src/datasource/stream.rs
+++ b/datafusion/core/src/datasource/stream.rs
@@ -104,6 +104,12 @@ pub struct StreamConfig {
 
 impl StreamConfig {
     /// Stream data from the file at `location`
+    ///
+    /// * Data will be read sequentially from the provided `location`
+    /// * New data will be appended to the end of the file
+    ///
+    /// The encoding can be configured with [`Self::with_encoding`] and
+    /// defaults to [`StreamEncoding::Csv`]
     pub fn new_file(schema: SchemaRef, location: PathBuf) -> Self {
         Self {
             schema,
@@ -180,11 +186,20 @@ impl StreamConfig {
     }
 }
 
-/// A [`TableProvider`] for a stream source, such as a FIFO file
+/// A [`TableProvider`] for an unbounded stream source
+///
+/// Currently only reading from / appending to a single file in-place is 
supported, but
+/// other stream sources and sinks may be added in future.
+///
+/// Applications looking to read/write datasets comprising multiple files, 
e.g. [Hadoop]-style
+/// data stored in object storage, should instead consider [`ListingTable`].
+///
+/// [Hadoop]: https://hadoop.apache.org/
+/// [`ListingTable`]: crate::datasource::listing::ListingTable
 pub struct StreamTable(Arc<StreamConfig>);
 
 impl StreamTable {
-    /// Create a new [`StreamTable`] for the given `StreamConfig`
+    /// Create a new [`StreamTable`] for the given [`StreamConfig`]
     pub fn new(config: Arc<StreamConfig>) -> Self {
         Self(config)
     }

Reply via email to