This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/asf-site by this push:
new ea96ee5bd92 deploy: 19a3bb0d264fdb06294ea8811c7e6514030f01dc
ea96ee5bd92 is described below
commit ea96ee5bd92259c268b6c167419196c190a81f20
Author: tustvold <[email protected]>
AuthorDate: Wed Mar 13 00:49:08 2024 +0000
deploy: 19a3bb0d264fdb06294ea8811c7e6514030f01dc
---
parquet/arrow/async_writer/index.html | 14 +-
.../async_writer/struct.AsyncArrowWriter.html | 62 ++---
search-index.js | 2 +-
src/parquet/arrow/async_writer/mod.rs.html | 268 +++++----------------
4 files changed, 90 insertions(+), 256 deletions(-)
diff --git a/parquet/arrow/async_writer/index.html
b/parquet/arrow/async_writer/index.html
index db2df345299..f8222c9f0b2 100644
--- a/parquet/arrow/async_writer/index.html
+++ b/parquet/arrow/async_writer/index.html
@@ -1,21 +1,15 @@
<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta
name="viewport" content="width=device-width, initial-scale=1.0"><meta
name="generator" content="rustdoc"><meta name="description" content="Contains
async writer which writes arrow data into parquet
data."><title>parquet::arrow::async_writer - Rust</title><link rel="preload"
as="font" type="font/woff2" crossorigin
href="../../../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link
rel="preload" as="font" type= [...]
- <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
+ <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
<p>Provides <code>async</code> API for writing [<code>RecordBatch</code>]es as
parquet files. The API is
similar to the <a href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>sync</code> API</a>, so please
read the documentation there before using this API.</p>
<p>Here is an example for using <a href="struct.AsyncArrowWriter.html"
title="struct
parquet::arrow::async_writer::AsyncArrowWriter"><code>AsyncArrowWriter</code></a>:</p>
-<div class="example-wrap"><pre class="rust rust-example-rendered"><code><span
class="kw">use </span>std::sync::Arc;
-<span class="kw">use </span>arrow_array::{ArrayRef, Int64Array, RecordBatch,
RecordBatchReader};
-<span class="kw">use </span>bytes::Bytes;
-<span class="kw">use </span>parquet::arrow::{AsyncArrowWriter,
arrow_reader::ParquetRecordBatchReaderBuilder};
-
-<span class="kw">let </span>col = Arc::new(Int64Array::from_iter_values([<span
class="number">1</span>, <span class="number">2</span>, <span
class="number">3</span>])) <span class="kw">as </span>ArrayRef;
+<div class="example-wrap"><pre class="rust rust-example-rendered"><code><span
class="kw">let </span>col = Arc::new(Int64Array::from_iter_values([<span
class="number">1</span>, <span class="number">2</span>, <span
class="number">3</span>])) <span class="kw">as </span>ArrayRef;
<span class="kw">let </span>to_write = RecordBatch::try_from_iter([(<span
class="string">"col"</span>, col)]).unwrap();
<span class="kw">let </span><span class="kw-2">mut </span>buffer = Vec::new();
-<span class="kw">let </span><span class="kw-2">mut </span>writer =
- AsyncArrowWriter::try_new(<span class="kw-2">&mut </span>buffer,
to_write.schema(), <span class="number">0</span>, <span
class="prelude-val">None</span>).unwrap();
+<span class="kw">let </span><span class="kw-2">mut </span>writer =
AsyncArrowWriter::try_new(<span class="kw-2">&mut </span>buffer,
to_write.schema(), <span class="prelude-val">None</span>).unwrap();
writer.write(<span class="kw-2">&</span>to_write).<span
class="kw">await</span>.unwrap();
writer.close().<span class="kw">await</span>.unwrap();
@@ -27,4 +21,4 @@ writer.close().<span class="kw">await</span>.unwrap();
<span class="kw">let </span>read = reader.next().unwrap().unwrap();
<span class="macro">assert_eq!</span>(to_write, read);</code></pre></div>
-</div></details><h2 id="structs" class="section-header">Structs<a
href="#structs" class="anchor">§</a></h2><ul class="item-table"><li><div
class="item-name"><a class="struct" href="struct.AsyncArrowWriter.html"
title="struct
parquet::arrow::async_writer::AsyncArrowWriter">AsyncArrowWriter</a></div><div
class="desc docblock-short">Async arrow
writer.</div></li></ul></section></div></main></body></html>
\ No newline at end of file
+</div></details><h2 id="structs" class="section-header">Structs<a
href="#structs" class="anchor">§</a></h2><ul class="item-table"><li><div
class="item-name"><a class="struct" href="struct.AsyncArrowWriter.html"
title="struct
parquet::arrow::async_writer::AsyncArrowWriter">AsyncArrowWriter</a></div><div
class="desc docblock-short">Encodes [<code>RecordBatch</code>] to parquet,
outputting to an
[<code>AsyncWrite</code>]</div></li></ul></section></div></main></body></html>
\ No newline at end of file
diff --git a/parquet/arrow/async_writer/struct.AsyncArrowWriter.html
b/parquet/arrow/async_writer/struct.AsyncArrowWriter.html
index 4ab5239de34..7bdf81030b6 100644
--- a/parquet/arrow/async_writer/struct.AsyncArrowWriter.html
+++ b/parquet/arrow/async_writer/struct.AsyncArrowWriter.html
@@ -1,18 +1,17 @@
-<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta
name="viewport" content="width=device-width, initial-scale=1.0"><meta
name="generator" content="rustdoc"><meta name="description" content="Async
arrow writer."><title>AsyncArrowWriter in parquet::arrow::async_writer -
Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link
rel="preload" as="font" type="font/woff2" crossorigin [...]
- <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
+<!DOCTYPE html><html lang="en"><head><meta charset="utf-8"><meta
name="viewport" content="width=device-width, initial-scale=1.0"><meta
name="generator" content="rustdoc"><meta name="description" content="Encodes
`RecordBatch` to parquet, outputting to an
`AsyncWrite`"><title>AsyncArrowWriter in parquet::arrow::async_writer -
Rust</title><link rel="preload" as="font" type="font/woff2" crossorigin
href="../../../static.files/SourceSerif4-Regular-46f98efaafac5295.ttf.woff2"><link
rel="prelo [...]
+ <main><div class="width-limiter"><nav class="sub"><form
class="search-form"><span></span><div id="sidebar-button" tabindex="-1"><a
href="../../../parquet/all.html" title="show sidebar"></a></div><input
class="search-input" name="search" aria-label="Run search in the documentation"
autocomplete="off" spellcheck="false" placeholder="Click or press ‘S’ to
search, ‘?’ for more options…" type="search"><div id="help-button"
tabindex="-1"><a href="../../../help.html" title="help">?</a></div [...]
sync_writer: <a class="struct"
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter">ArrowWriter</a><<a class="struct"
href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html"
title="struct alloc::vec::Vec">Vec</a><<a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.u8.html">u8</a>>>,
async_writer: W,
- buffer_size: <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a>,
-}</code></pre><details class="toggle top-doc" open><summary
class="hideme"><span>Expand description</span></summary><div
class="docblock"><p>Async arrow writer.</p>
-<p>It is implemented based on the sync writer <a
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>ArrowWriter</code></a> with an
inner buffer.
-The buffered data will be flushed to the writer provided by caller when the
-buffer’s threshold is exceeded.</p>
-<h3 id="memory-limiting"><a class="doc-anchor"
href="#memory-limiting">§</a>Memory Limiting</h3>
-<p>The nature of parquet forces buffering of an entire row group before it can
be flushed
-to the underlying writer. This buffering may exceed the configured buffer size
-of <a href="struct.AsyncArrowWriter.html" title="struct
parquet::arrow::async_writer::AsyncArrowWriter"><code>AsyncArrowWriter</code></a>.
Memory usage can be limited by prematurely flushing the row group,
-although this will have implications for file size and query performance. See
<a href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter">ArrowWriter</a>
-for more information.</p>
+}</code></pre><details class="toggle top-doc" open><summary
class="hideme"><span>Expand description</span></summary><div
class="docblock"><p>Encodes [<code>RecordBatch</code>] to parquet, outputting
to an [<code>AsyncWrite</code>]</p>
+<h3 id="memory-usage"><a class="doc-anchor" href="#memory-usage">§</a>Memory
Usage</h3>
+<p>This writer eagerly writes data as soon as possible to the underlying
[<code>AsyncWrite</code>],
+permitting fine-grained control over buffering and I/O scheduling. However,
the columnar
+nature of parquet forces data for an entire row group to be buffered in
memory, before
+it can be flushed. Depending on the data and the configured row group size,
this buffering
+may be substantial.</p>
+<p>Memory usage can be limited by calling <a
href="struct.AsyncArrowWriter.html#method.flush" title="method
parquet::arrow::async_writer::AsyncArrowWriter::flush"><code>Self::flush</code></a>
to flush the in progress row group,
+although this will likely increase overall file size and reduce query
performance.
+See <a href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter">ArrowWriter</a> for more
information.</p>
<div class="example-wrap"><pre class="rust rust-example-rendered"><code><span
class="kw">let </span><span class="kw-2">mut </span>writer:
AsyncArrowWriter<File> = <span class="macro">todo!</span>();
<span class="kw">let </span>batch: RecordBatch = <span
class="macro">todo!</span>();
@@ -23,40 +22,29 @@ writer.write(<span class="kw-2">&</span>batch).<span
class="kw">await</span>
}</code></pre></div>
</div></details><h2 id="fields" class="fields section-header">Fields<a
href="#fields" class="anchor">§</a></h2><span id="structfield.sync_writer"
class="structfield section-header"><a href="#structfield.sync_writer"
class="anchor field">§</a><code>sync_writer: <a class="struct"
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter">ArrowWriter</a><<a class="struct"
href="https://doc.rust-lang.org/nightly/alloc/vec/struct.Vec.html" title [...]
</div><span id="structfield.async_writer" class="structfield
section-header"><a href="#structfield.async_writer" class="anchor
field">§</a><code>async_writer: W</code></span><div class="docblock"><p>Async
writer provided by caller</p>
-</div><span id="structfield.buffer_size" class="structfield section-header"><a
href="#structfield.buffer_size" class="anchor field">§</a><code>buffer_size: <a
class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></code></span><div
class="docblock"><p>Trigger forced flushing once buffer size reaches this
value</p>
-</div><h2 id="implementations" class="section-header">Implementations<a
href="#implementations" class="anchor">§</a></h2><div
id="implementations-list"><details class="toggle implementors-toggle"
open><summary><section id="impl-AsyncArrowWriter%3CW%3E" class="impl"><a
class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#104-226">source</a><a
href="#impl-AsyncArrowWriter%3CW%3E" class="anchor">§</a><h3
class="code-header">impl<W: AsyncWrite + <a class="trait" [...]
+</div><h2 id="implementations" class="section-header">Implementations<a
href="#implementations" class="anchor">§</a></h2><div
id="implementations-list"><details class="toggle implementors-toggle"
open><summary><section id="impl-AsyncArrowWriter%3CW%3E" class="impl"><a
class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#100-205">source</a><a
href="#impl-AsyncArrowWriter%3CW%3E" class="anchor">§</a><h3
class="code-header">impl<W: AsyncWrite + <a class="trait" [...]
writer: W,
arrow_schema: SchemaRef,
- buffer_size: <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a>,
props: <a class="enum"
href="https://doc.rust-lang.org/nightly/core/option/enum.Option.html"
title="enum core::option::Option">Option</a><<a class="struct"
href="../../file/properties/struct.WriterProperties.html" title="struct
parquet::file::properties::WriterProperties">WriterProperties</a>>
-) -> <a class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><Self></h4></section></summary><div
class="docblock"><p>Try to create a new Async Arrow Writer.</p>
-<p><code>buffer_size</code> determines the minimum number of bytes to buffer
before flushing
-to the underlying [<code>AsyncWrite</code>]. However, the nature of writing
parquet may
-force buffering of data in excess of this within the underlying <a
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>ArrowWriter</code></a>.
-See the documentation on <a href="../arrow_writer/struct.ArrowWriter.html"
title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>ArrowWriter</code></a> for
more details</p>
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.try_new_with_options" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#127-144">source</a><h4
class="code-header">pub fn <a href="#method.try_new_with_options"
class="fn">try_new_with_options</a>(
+) -> <a class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><Self></h4></section></summary><div
class="docblock"><p>Try to create a new Async Arrow Writer</p>
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.try_new_with_options" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#112-123">source</a><h4
class="code-header">pub fn <a href="#method.try_new_with_options"
class="fn">try_new_with_options</a>(
writer: W,
arrow_schema: SchemaRef,
- buffer_size: <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a>,
options: <a class="struct"
href="../arrow_writer/struct.ArrowWriterOptions.html" title="struct
parquet::arrow::arrow_writer::ArrowWriterOptions">ArrowWriterOptions</a>
-) -> <a class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><Self></h4></section></summary><div
class="docblock"><p>Try to create a new Async Arrow Writer with <a
href="../arrow_writer/struct.ArrowWriterOptions.html" title="struct
parquet::arrow::arrow_writer::ArrowWriterOptions"><code>ArrowWriterOptions</code></a>.</p>
-<p><code>buffer_size</code> determines the minimum number of bytes to buffer
before flushing
-to the underlying [<code>AsyncWrite</code>]. However, the nature of writing
parquet may
-force buffering of data in excess of this within the underlying <a
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>ArrowWriter</code></a>.
-See the documentation on <a href="../arrow_writer/struct.ArrowWriter.html"
title="struct
parquet::arrow::arrow_writer::ArrowWriter"><code>ArrowWriter</code></a> for
more details</p>
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.flushed_row_groups" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#147-149">source</a><h4
class="code-header">pub fn <a href="#method.flushed_row_groups"
class="fn">flushed_row_groups</a>(&self) -> &[<a class="type"
href="../../file/metadata/type.RowGroupMetaDataPtr.html" title="type
parquet::file::metadata::RowGroupMetaDataPtr">RowGroupMet [...]
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.in_progress_size" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#152-154">source</a><h4
class="code-header">pub fn <a href="#method.in_progress_size"
class="fn">in_progress_size</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns [...]
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.in_progress_rows" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#157-159">source</a><h4
class="code-header">pub fn <a href="#method.in_progress_rows"
class="fn">in_progress_rows</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns [...]
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.bytes_written" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#162-164">source</a><h4
class="code-header">pub fn <a href="#method.bytes_written"
class="fn">bytes_written</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns the numbe [...]
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.write" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#170-173">source</a><h4
class="code-header">pub async fn <a href="#method.write"
class="fn">write</a>(&mut self, batch: &RecordBatch) -> <a
class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="primitive"
href="https://doc.rust-lang [...]
+) -> <a class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><Self></h4></section></summary><div
class="docblock"><p>Try to create a new Async Arrow Writer with <a
href="../arrow_writer/struct.ArrowWriterOptions.html" title="struct
parquet::arrow::arrow_writer::ArrowWriterOptions"><code>ArrowWriterOptions</code></a></p>
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.flushed_row_groups" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#126-128">source</a><h4
class="code-header">pub fn <a href="#method.flushed_row_groups"
class="fn">flushed_row_groups</a>(&self) -> &[<a class="type"
href="../../file/metadata/type.RowGroupMetaDataPtr.html" title="type
parquet::file::metadata::RowGroupMetaDataPtr">RowGroupMet [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.in_progress_size" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#131-133">source</a><h4
class="code-header">pub fn <a href="#method.in_progress_size"
class="fn">in_progress_size</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.in_progress_rows" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#136-138">source</a><h4
class="code-header">pub fn <a href="#method.in_progress_rows"
class="fn">in_progress_rows</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.bytes_written" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#141-143">source</a><h4
class="code-header">pub fn <a href="#method.bytes_written"
class="fn">bytes_written</a>(&self) -> <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.usize.html">usize</a></h4></section></summary><div
class="docblock"><p>Returns the numbe [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.write" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#149-156">source</a><h4
class="code-header">pub async fn <a href="#method.write"
class="fn">write</a>(&mut self, batch: &RecordBatch) -> <a
class="type" href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="primitive"
href="https://doc.rust-lang [...]
<p>After every sync write by the inner <a
href="../arrow_writer/struct.ArrowWriter.html" title="struct
parquet::arrow::arrow_writer::ArrowWriter">ArrowWriter</a>, the inner buffer
will be
checked and flush if at least half full</p>
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.flush" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#176-181">source</a><h4
class="code-header">pub async fn <a href="#method.flush"
class="fn">flush</a>(&mut self) -> <a class="type"
href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitiv [...]
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.append_key_value_metadata" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#186-188">source</a><h4
class="code-header">pub fn <a href="#method.append_key_value_metadata"
class="fn">append_key_value_metadata</a>(&mut self, kv_metadata: <a
class="struct" href="../../format/struct.KeyValue.html" title="struct
parquet::format::KeyValue">KeyValue</a>)</h [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.flush" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#159-164">source</a><h4
class="code-header">pub async fn <a href="#method.flush"
class="fn">flush</a>(&mut self) -> <a class="type"
href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitiv [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.append_key_value_metadata" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#169-171">source</a><h4
class="code-header">pub fn <a href="#method.append_key_value_metadata"
class="fn">append_key_value_metadata</a>(&mut self, kv_metadata: <a
class="struct" href="../../format/struct.KeyValue.html" title="struct
parquet::format::KeyValue">KeyValue</a>)</h [...]
<p>This method allows to append metadata after [<code>RecordBatch</code>]es
are written.</p>
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.close" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#193-201">source</a><h4
class="code-header">pub async fn <a href="#method.close"
class="fn">close</a>(self) -> <a class="type"
href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="struct"
href="../../format/struct.FileMetaData.html" title="struct parqu [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.close" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#176-184">source</a><h4
class="code-header">pub async fn <a href="#method.close"
class="fn">close</a>(self) -> <a class="type"
href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="struct"
href="../../format/struct.FileMetaData.html" title="struct parqu [...]
<p>All the data in the inner buffer will be force flushed.</p>
-</div></details><details class="toggle method-toggle" open><summary><section
id="method.try_flush" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#204-225">source</a><h4
class="code-header">async fn <a href="#method.try_flush"
class="fn">try_flush</a>(&mut self, force: <a class="primitive"
href="https://doc.rust-lang.org/nightly/std/primitive.bool.html">bool</a>)
-> <a class="type" href="../../errors/type.Result.html" title="type p [...]
+</div></details><details class="toggle method-toggle" open><summary><section
id="method.do_write" class="method"><a class="src rightside"
href="../../../src/parquet/arrow/async_writer/mod.rs.html#187-204">source</a><h4
class="code-header">async fn <a href="#method.do_write"
class="fn">do_write</a>(&mut self) -> <a class="type"
href="../../errors/type.Result.html" title="type
parquet::errors::Result">Result</a><<a class="primitive"
href="https://doc.rust-lang.org/nightly/std/pri [...]
</div></details></div></details></div><h2 id="synthetic-implementations"
class="section-header">Auto Trait Implementations<a
href="#synthetic-implementations" class="anchor">§</a></h2><div
id="synthetic-implementations-list"><section
id="impl-Freeze-for-AsyncArrowWriter%3CW%3E" class="impl"><a
href="#impl-Freeze-for-AsyncArrowWriter%3CW%3E" class="anchor">§</a><h3
class="code-header">impl<W> <a class="trait"
href="https://doc.rust-lang.org/nightly/core/marker/trait.Freeze.html" tit [...]
W: <a class="trait"
href="https://doc.rust-lang.org/nightly/core/marker/trait.Freeze.html"
title="trait core::marker::Freeze">Freeze</a>,</div></h3></section><section
id="impl-RefUnwindSafe-for-AsyncArrowWriter%3CW%3E" class="impl"><a
href="#impl-RefUnwindSafe-for-AsyncArrowWriter%3CW%3E" class="anchor">§</a><h3
class="code-header">impl<W> !<a class="trait"
href="https://doc.rust-lang.org/nightly/core/panic/unwind_safe/trait.RefUnwindSafe.html"
title="trait core::panic::unwind_ [...]
W: <a class="trait"
href="https://doc.rust-lang.org/nightly/core/marker/trait.Send.html"
title="trait core::marker::Send">Send</a>,</div></h3></section><section
id="impl-Sync-for-AsyncArrowWriter%3CW%3E" class="impl"><a
href="#impl-Sync-for-AsyncArrowWriter%3CW%3E" class="anchor">§</a><h3
class="code-header">impl<W> !<a class="trait"
href="https://doc.rust-lang.org/nightly/core/marker/trait.Sync.html"
title="trait core::marker::Sync">Sync</a> for <a class="struct" href="struct.
[...]
diff --git a/search-index.js b/search-index.js
index def7d965775..a1972237a43 100644
--- a/search-index.js
+++ b/search-index.js
@@ -24,7 +24,7 @@ var searchIndex = new Map(JSON.parse('[\
["flight_test_integration_client",{"doc":"","t":"FPPIPPIGNNNNNNNNNNNNNNNNNNONNNNHOOONNNNNNNNNNNNN","n":["Args","AuthBasicProto","Err","Error","Middleware","Ok","Result","Scenario","augment_args","augment_args_for_update","borrow","borrow","borrow_mut","borrow_mut","clone","clone_into","command","command_for_update","fmt","fmt","from","from","from_arg_matches","from_arg_matches_mut","from_ref","group_id","host","into","into","into_request","into_request","main","path","port","scenario","t
[...]
["flight_test_integration_server",{"doc":"","t":"FPPIPPIGNNNNNNNNNNNNNNNNNNNNNNHOONNNNNNNNNNNNN","n":["Args","AuthBasicProto","Err","Error","Middleware","Ok","Result","Scenario","augment_args","augment_args_for_update","borrow","borrow","borrow_mut","borrow_mut","clone","clone_into","command","command_for_update","fmt","fmt","from","from","from_arg_matches","from_arg_matches_mut","from_ref","group_id","into","into","into_request","into_request","main","port","scenario","to_owned","to_pos
[...]
["gen",{"doc":"","t":"HH","n":["main","prost_config"],"q":[[0,"gen"],[2,"core::error"],[3,"alloc::boxed"],[4,"core::result"],[5,"prost_build"]],"d":["",""],"i":[0,0],"f":"{{}{{h{b{f{d}}}}}}{{}j}","c":[],"p":[[1,"unit"],[10,"Error",2],[5,"Box",3],[6,"Result",4],[5,"Config",5]],"b":[]}],\
-["parquet",{"doc":"This crate contains the official Native Rust
…","t":"CCCCCCQCCCCCSEEFSEFNCHCCCNNNNCNNNNCONNNNNNNNOOHHHCNNNNNNNNNKFFFFFIFFFKFFHOOOONNNNNNNNNNNNNNNNNNNMNONMHOOOOCOONNNNNNNNNOONNNNNNNNOONNNNOOOOONNNNNNNNNNMNOOONNOMOOOOOOOOOONNNOOOOCOOOOHOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKFFNNNNMNONNNNNNOMNONNNNNNFFNNNNNNNNNNNNNNNNNNNNNNNNHNNNNNNNONNONNONNNNNNNNNNFFFFGFFFFFPPIONNONNNNNNNNNNNNNNNNNNNNOOCNONNNNNNNOHOONNNNNNNNNNNNNNNNNNNNNHHNHHHNHHHHNONNNNNNNNNNNNNNNNOCONNOOONNNOONNNNNNNN
[...]
+["parquet",{"doc":"This crate contains the official Native Rust
…","t":"CCCCCCQCCCCCSEEFSEFNCHCCCNNNNCNNNNCONNNNNNNNOOHHHCNNNNNNNNNKFFFFFIFFFKFFHOOOONNNNNNNNNNNNNNNNNNNMNONMHOOOOCOONNNNNNNNNOONNNNNNNNOONNNNOOOOONNNNNNNNNNMNOOONNOMOOOOOOOOOONNNOOOOCOOOOHOONNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKFFNNNNMNONNNNNNOMNONNNNNNFFNNNNNNNNNNNNNNNNNNNNNNNNHNNNNNNNONNONNONNNNNNNNNNFFFFGFFFFFPPIONNONNNNNNNNNNNNNNNNNNNNOOCNONNNNNNNOHOONNNNNNNNNNNNNNNNNNNNNHHNHHHNHHHHNONNNNNNNNNNNNNNNNOCONNOOONNNOONNNNNNNN
[...]
["parquet_concat",{"doc":"Binary that concatenates the column data of one or
more
…","t":"FNNNNNNNNNNNONHONNNNNN","n":["Args","augment_args","augment_args_for_update","borrow","borrow_mut","command","command_for_update","fmt","from","from_arg_matches","from_arg_matches_mut","group_id","input","into","main","output","run","try_from","try_into","type_id","update_from_arg_matches","update_from_arg_matches_mut"],"q":[[0,"parquet_concat"],[22,"clap_builder::builder::command"],[23,"core::fmt"]
[...]
["parquet_derive",{"doc":"","t":"YYCPPPFPPPGGPPPNNNNNNNNNNNNNNNNNNNNNNNNNNNONNNNONNNNNNNNNNONNNNNNONNNN","n":["ParquetRecordReader","ParquetRecordWriter","parquet_field","Array","ChronoNaiveDate","ChronoNaiveDateTime","Field","Option","Reference","Slice","ThirdPartyType","Type","TypePath","Uuid","Vec","borrow","borrow","borrow","borrow_mut","borrow_mut","borrow_mut","column_reader","column_writer","converted_type","copied_direct_fields","copied_direct_vals","eq","eq","eq","fmt","fmt","fm
[...]
["parquet_derive_test",{"doc":"","t":"FFOOOOONNNNOOOOOOOOOONOONNNOOOONNOOOOOOOOOOOOOONNNNNNOO","n":["ACompleteRecord","APartiallyCompleteRecord","a_bool","a_borrowed_string","a_str","a_string","bool","borrow","borrow","borrow_mut","borrow_mut","borrowed_byte_vec","borrowed_maybe_a_str","borrowed_maybe_a_string","borrowed_maybe_borrowed_byte_vec","borrowed_maybe_byte_vec","byte_vec","byte_vec","date","double","double","eq","float","float","fmt","from","from","i16","i16","i32","i32","into"
[...]
diff --git a/src/parquet/arrow/async_writer/mod.rs.html
b/src/parquet/arrow/async_writer/mod.rs.html
index f21adec18ac..b9d739ad894 100644
--- a/src/parquet/arrow/async_writer/mod.rs.html
+++ b/src/parquet/arrow/async_writer/mod.rs.html
@@ -391,80 +391,6 @@
<a href="#389" id="389">389</a>
<a href="#390" id="390">390</a>
<a href="#391" id="391">391</a>
-<a href="#392" id="392">392</a>
-<a href="#393" id="393">393</a>
-<a href="#394" id="394">394</a>
-<a href="#395" id="395">395</a>
-<a href="#396" id="396">396</a>
-<a href="#397" id="397">397</a>
-<a href="#398" id="398">398</a>
-<a href="#399" id="399">399</a>
-<a href="#400" id="400">400</a>
-<a href="#401" id="401">401</a>
-<a href="#402" id="402">402</a>
-<a href="#403" id="403">403</a>
-<a href="#404" id="404">404</a>
-<a href="#405" id="405">405</a>
-<a href="#406" id="406">406</a>
-<a href="#407" id="407">407</a>
-<a href="#408" id="408">408</a>
-<a href="#409" id="409">409</a>
-<a href="#410" id="410">410</a>
-<a href="#411" id="411">411</a>
-<a href="#412" id="412">412</a>
-<a href="#413" id="413">413</a>
-<a href="#414" id="414">414</a>
-<a href="#415" id="415">415</a>
-<a href="#416" id="416">416</a>
-<a href="#417" id="417">417</a>
-<a href="#418" id="418">418</a>
-<a href="#419" id="419">419</a>
-<a href="#420" id="420">420</a>
-<a href="#421" id="421">421</a>
-<a href="#422" id="422">422</a>
-<a href="#423" id="423">423</a>
-<a href="#424" id="424">424</a>
-<a href="#425" id="425">425</a>
-<a href="#426" id="426">426</a>
-<a href="#427" id="427">427</a>
-<a href="#428" id="428">428</a>
-<a href="#429" id="429">429</a>
-<a href="#430" id="430">430</a>
-<a href="#431" id="431">431</a>
-<a href="#432" id="432">432</a>
-<a href="#433" id="433">433</a>
-<a href="#434" id="434">434</a>
-<a href="#435" id="435">435</a>
-<a href="#436" id="436">436</a>
-<a href="#437" id="437">437</a>
-<a href="#438" id="438">438</a>
-<a href="#439" id="439">439</a>
-<a href="#440" id="440">440</a>
-<a href="#441" id="441">441</a>
-<a href="#442" id="442">442</a>
-<a href="#443" id="443">443</a>
-<a href="#444" id="444">444</a>
-<a href="#445" id="445">445</a>
-<a href="#446" id="446">446</a>
-<a href="#447" id="447">447</a>
-<a href="#448" id="448">448</a>
-<a href="#449" id="449">449</a>
-<a href="#450" id="450">450</a>
-<a href="#451" id="451">451</a>
-<a href="#452" id="452">452</a>
-<a href="#453" id="453">453</a>
-<a href="#454" id="454">454</a>
-<a href="#455" id="455">455</a>
-<a href="#456" id="456">456</a>
-<a href="#457" id="457">457</a>
-<a href="#458" id="458">458</a>
-<a href="#459" id="459">459</a>
-<a href="#460" id="460">460</a>
-<a href="#461" id="461">461</a>
-<a href="#462" id="462">462</a>
-<a href="#463" id="463">463</a>
-<a href="#464" id="464">464</a>
-<a href="#465" id="465">465</a>
</pre></div><pre class="rust"><code><span class="comment">// Licensed to the
Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
@@ -493,17 +419,16 @@
//! # #[tokio::main(flavor="current_thread")]
//! # async fn main() {
//! #
-//! use std::sync::Arc;
-//! use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchReader};
-//! use bytes::Bytes;
-//! use parquet::arrow::{AsyncArrowWriter,
arrow_reader::ParquetRecordBatchReaderBuilder};
-//!
+//! # use std::sync::Arc;
+//! # use arrow_array::{ArrayRef, Int64Array, RecordBatch, RecordBatchReader};
+//! # use bytes::Bytes;
+//! # use parquet::arrow::{AsyncArrowWriter,
arrow_reader::ParquetRecordBatchReaderBuilder};
+//! #
//! let col = Arc::new(Int64Array::from_iter_values([1, 2, 3])) as ArrayRef;
//! let to_write = RecordBatch::try_from_iter([("col", col)]).unwrap();
//!
//! let mut buffer = Vec::new();
-//! let mut writer =
-//! AsyncArrowWriter::try_new(&mut buffer, to_write.schema(), 0,
None).unwrap();
+//! let mut writer = AsyncArrowWriter::try_new(&mut buffer,
to_write.schema(), None).unwrap();
//! writer.write(&to_write).await.unwrap();
//! writer.close().await.unwrap();
//!
@@ -529,19 +454,19 @@
<span class="kw">use </span>arrow_schema::SchemaRef;
<span class="kw">use </span>tokio::io::{AsyncWrite, AsyncWriteExt};
-<span class="doccomment">/// Async arrow writer.
+<span class="doccomment">/// Encodes [`RecordBatch`] to parquet, outputting to
an [`AsyncWrite`]
///
-/// It is implemented based on the sync writer [`ArrowWriter`] with an inner
buffer.
-/// The buffered data will be flushed to the writer provided by caller when the
-/// buffer's threshold is exceeded.
+/// ## Memory Usage
///
-/// ## Memory Limiting
+/// This writer eagerly writes data as soon as possible to the underlying
[`AsyncWrite`],
+/// permitting fine-grained control over buffering and I/O scheduling.
However, the columnar
+/// nature of parquet forces data for an entire row group to be buffered in
memory, before
+/// it can be flushed. Depending on the data and the configured row group
size, this buffering
+/// may be substantial.
///
-/// The nature of parquet forces buffering of an entire row group before it
can be flushed
-/// to the underlying writer. This buffering may exceed the configured buffer
size
-/// of [`AsyncArrowWriter`]. Memory usage can be limited by prematurely
flushing the row group,
-/// although this will have implications for file size and query performance.
See [ArrowWriter]
-/// for more information.
+/// Memory usage can be limited by calling [`Self::flush`] to flush the in
progress row group,
+/// although this will likely increase overall file size and reduce query
performance.
+/// See [ArrowWriter] for more information.
///
/// ```no_run
/// # use tokio::fs::File;
@@ -563,50 +488,30 @@
<span class="doccomment">/// Async writer provided by caller
</span>async_writer: W,
-
- <span class="doccomment">/// Trigger forced flushing once buffer size
reaches this value
- </span>buffer_size: usize,
}
<span class="kw">impl</span><W: AsyncWrite + Unpin + Send>
AsyncArrowWriter<W> {
- <span class="doccomment">/// Try to create a new Async Arrow Writer.
- ///
- /// `buffer_size` determines the minimum number of bytes to buffer before
flushing
- /// to the underlying [`AsyncWrite`]. However, the nature of writing
parquet may
- /// force buffering of data in excess of this within the underlying
[`ArrowWriter`].
- /// See the documentation on [`ArrowWriter`] for more details
+ <span class="doccomment">/// Try to create a new Async Arrow Writer
</span><span class="kw">pub fn </span>try_new(
writer: W,
arrow_schema: SchemaRef,
- buffer_size: usize,
props: <span class="prelude-ty">Option</span><WriterProperties>,
) -> <span class="prelude-ty">Result</span><<span
class="self">Self</span>> {
<span class="kw">let </span>options =
ArrowWriterOptions::new().with_properties(props.unwrap_or_default());
- <span class="self">Self</span>::try_new_with_options(writer,
arrow_schema, buffer_size, options)
+ <span class="self">Self</span>::try_new_with_options(writer,
arrow_schema, options)
}
- <span class="doccomment">/// Try to create a new Async Arrow Writer with
[`ArrowWriterOptions`].
- ///
- /// `buffer_size` determines the minimum number of bytes to buffer before
flushing
- /// to the underlying [`AsyncWrite`]. However, the nature of writing
parquet may
- /// force buffering of data in excess of this within the underlying
[`ArrowWriter`].
- /// See the documentation on [`ArrowWriter`] for more details
+ <span class="doccomment">/// Try to create a new Async Arrow Writer with
[`ArrowWriterOptions`]
</span><span class="kw">pub fn </span>try_new_with_options(
writer: W,
arrow_schema: SchemaRef,
- buffer_size: usize,
options: ArrowWriterOptions,
) -> <span class="prelude-ty">Result</span><<span
class="self">Self</span>> {
- <span class="kw">let </span>sync_writer =
ArrowWriter::try_new_with_options(
- Vec::with_capacity(buffer_size),
- arrow_schema,
- options,
- )<span class="question-mark">?</span>;
+ <span class="kw">let </span>sync_writer =
ArrowWriter::try_new_with_options(Vec::new(), arrow_schema, options)<span
class="question-mark">?</span>;
<span class="prelude-val">Ok</span>(<span class="self">Self </span>{
sync_writer,
async_writer: writer,
- buffer_size,
})
}
@@ -635,14 +540,18 @@
/// After every sync write by the inner [ArrowWriter], the inner buffer
will be
/// checked and flush if at least half full
</span><span class="kw">pub async fn </span>write(<span
class="kw-2">&mut </span><span class="self">self</span>, batch: <span
class="kw-2">&</span>RecordBatch) -> <span
class="prelude-ty">Result</span><()> {
+ <span class="kw">let </span>before = <span
class="self">self</span>.sync_writer.flushed_row_groups().len();
<span class="self">self</span>.sync_writer.write(batch)<span
class="question-mark">?</span>;
- <span class="self">self</span>.try_flush(<span
class="bool-val">false</span>).<span class="kw">await
- </span>}
+ <span class="kw">if </span>before != <span
class="self">self</span>.sync_writer.flushed_row_groups().len() {
+ <span class="self">self</span>.do_write().<span
class="kw">await</span><span class="question-mark">?</span>;
+ }
+ <span class="prelude-val">Ok</span>(())
+ }
<span class="doccomment">/// Flushes all buffered rows into a new row group
</span><span class="kw">pub async fn </span>flush(<span
class="kw-2">&mut </span><span class="self">self</span>) -> <span
class="prelude-ty">Result</span><()> {
<span class="self">self</span>.sync_writer.flush()<span
class="question-mark">?</span>;
- <span class="self">self</span>.try_flush(<span
class="bool-val">false</span>).<span class="kw">await</span><span
class="question-mark">?</span>;
+ <span class="self">self</span>.do_write().<span
class="kw">await</span><span class="question-mark">?</span>;
<span class="prelude-val">Ok</span>(())
}
@@ -661,19 +570,15 @@
<span class="kw">let </span>metadata = <span
class="self">self</span>.sync_writer.finish()<span
class="question-mark">?</span>;
<span class="comment">// Force to flush the remaining data.
- </span><span class="self">self</span>.try_flush(<span
class="bool-val">true</span>).<span class="kw">await</span><span
class="question-mark">?</span>;
+ </span><span class="self">self</span>.do_write().<span
class="kw">await</span><span class="question-mark">?</span>;
<span class="self">self</span>.async_writer.shutdown().<span
class="kw">await</span><span class="question-mark">?</span>;
<span class="prelude-val">Ok</span>(metadata)
}
- <span class="doccomment">/// Flush the buffered data into the
`async_writer`
- </span><span class="kw">async fn </span>try_flush(<span
class="kw-2">&mut </span><span class="self">self</span>, force: bool) ->
<span class="prelude-ty">Result</span><()> {
+ <span class="doccomment">/// Flush the data written by `sync_writer` into
the `async_writer`
+ </span><span class="kw">async fn </span>do_write(<span
class="kw-2">&mut </span><span class="self">self</span>) -> <span
class="prelude-ty">Result</span><()> {
<span class="kw">let </span>buffer = <span
class="self">self</span>.sync_writer.inner_mut();
- <span class="kw">if </span>!force && (buffer.is_empty() ||
buffer.len() < <span class="self">self</span>.buffer_size) {
- <span class="comment">// no need to flush
- </span><span class="kw">return </span><span
class="prelude-val">Ok</span>(());
- }
<span class="self">self</span>.async_writer
.write_all(buffer.as_slice())
@@ -721,8 +626,7 @@
<span class="kw">let </span>to_write =
RecordBatch::try_from_iter([(<span class="string">"col"</span>, col)]).unwrap();
<span class="kw">let </span><span class="kw-2">mut </span>buffer =
Vec::new();
- <span class="kw">let </span><span class="kw-2">mut </span>writer =
- AsyncArrowWriter::try_new(<span class="kw-2">&mut
</span>buffer, to_write.schema(), <span class="number">0</span>, <span
class="prelude-val">None</span>).unwrap();
+ <span class="kw">let </span><span class="kw-2">mut </span>writer =
AsyncArrowWriter::try_new(<span class="kw-2">&mut </span>buffer,
to_write.schema(), <span class="prelude-val">None</span>).unwrap();
writer.write(<span class="kw-2">&</span>to_write).<span
class="kw">await</span>.unwrap();
writer.close().<span class="kw">await</span>.unwrap();
@@ -750,7 +654,6 @@
<span class="kw">let </span><span class="kw-2">mut </span>async_writer
= AsyncArrowWriter::try_new(
<span class="kw-2">&mut </span>async_buffer,
reader.schema(),
- <span class="number">1024</span>,
<span class="prelude-val">Some</span>(write_props.clone()),
)
.unwrap();
@@ -812,54 +715,6 @@
}
}
- <span class="attr">#[tokio::test]
- </span><span class="kw">async fn
</span>test_async_writer_with_buffer_flush_threshold() {
- <span class="kw">let </span>write_props = WriterProperties::builder()
- .set_max_row_group_size(<span class="number">2048</span>)
- .build();
- <span class="kw">let </span>expect_encode_size = {
- <span class="kw">let </span>reader = get_test_reader();
- <span class="kw">let </span><span class="kw-2">mut </span>buffer =
Vec::new();
- <span class="kw">let </span><span class="kw-2">mut
</span>async_writer = AsyncArrowWriter::try_new(
- <span class="kw-2">&mut </span>buffer,
- reader.schema(),
- <span class="number">0</span>,
- <span class="prelude-val">Some</span>(write_props.clone()),
- )
- .unwrap();
- <span class="kw">for </span>record_batch <span class="kw">in
</span>reader {
- <span class="kw">let </span>record_batch =
record_batch.unwrap();
- async_writer.write(<span
class="kw-2">&</span>record_batch).<span class="kw">await</span>.unwrap();
- }
- async_writer.close().<span class="kw">await</span>.unwrap();
- buffer.len()
- };
-
- <span class="kw">let </span>test_buffer_flush_thresholds = <span
class="macro">vec!</span>[<span class="number">0</span>, <span
class="number">1024</span>, <span class="number">40 </span>* <span
class="number">1024</span>, <span class="number">50 </span>* <span
class="number">1024</span>, <span class="number">100 </span>* <span
class="number">1024</span>];
-
- <span class="kw">for </span>buffer_flush_threshold <span class="kw">in
</span>test_buffer_flush_thresholds {
- <span class="kw">let </span>reader = get_test_reader();
- <span class="kw">let </span><span class="kw-2">mut
</span>test_async_sink = TestAsyncSink {
- sink: Vec::new(),
- min_accept_bytes: buffer_flush_threshold,
- expect_total_bytes: expect_encode_size,
- };
- <span class="kw">let </span><span class="kw-2">mut
</span>async_writer = AsyncArrowWriter::try_new(
- <span class="kw-2">&mut </span>test_async_sink,
- reader.schema(),
- buffer_flush_threshold * <span class="number">2</span>,
- <span class="prelude-val">Some</span>(write_props.clone()),
- )
- .unwrap();
-
- <span class="kw">for </span>record_batch <span class="kw">in
</span>reader {
- <span class="kw">let </span>record_batch =
record_batch.unwrap();
- async_writer.write(<span
class="kw-2">&</span>record_batch).<span class="kw">await</span>.unwrap();
- }
- async_writer.close().<span class="kw">await</span>.unwrap();
- }
- }
-
<span class="attr">#[tokio::test]
</span><span class="kw">async fn </span>test_async_writer_file() {
<span class="kw">let </span>col =
Arc::new(Int64Array::from_iter_values([<span class="number">1</span>, <span
class="number">2</span>, <span class="number">3</span>])) <span class="kw">as
</span>ArrayRef;
@@ -873,7 +728,7 @@
<span class="kw">let </span>temp = tempfile::tempfile().unwrap();
<span class="kw">let </span>file =
tokio::fs::File::from_std(temp.try_clone().unwrap());
- <span class="kw">let </span><span class="kw-2">mut </span>writer =
AsyncArrowWriter::try_new(file, to_write.schema(), <span
class="number">0</span>, <span class="prelude-val">None</span>).unwrap();
+ <span class="kw">let </span><span class="kw-2">mut </span>writer =
AsyncArrowWriter::try_new(file, to_write.schema(), <span
class="prelude-val">None</span>).unwrap();
writer.write(<span class="kw-2">&</span>to_write).<span
class="kw">await</span>.unwrap();
writer.close().<span class="kw">await</span>.unwrap();
@@ -897,37 +752,34 @@
<span class="comment">// build a record batch
</span><span class="kw">let </span>batch =
RecordBatch::try_new(Arc::new(schema), <span
class="macro">vec!</span>[Arc::new(a)]).unwrap();
- <span class="kw">for </span>buffer_size <span class="kw">in
</span>[<span class="number">0</span>, <span class="number">8</span>, <span
class="number">1024</span>] {
- <span class="kw">let </span>temp = tempfile::tempfile().unwrap();
- <span class="kw">let </span>file =
tokio::fs::File::from_std(temp.try_clone().unwrap());
- <span class="kw">let </span><span class="kw-2">mut </span>writer =
- AsyncArrowWriter::try_new(file, batch.schema(), buffer_size,
<span class="prelude-val">None</span>).unwrap();
-
- <span class="comment">// starts empty
- </span><span
class="macro">assert_eq!</span>(writer.in_progress_size(), <span
class="number">0</span>);
- <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
<span class="number">0</span>);
- <span class="macro">assert_eq!</span>(writer.bytes_written(),
<span class="number">4</span>); <span class="comment">// Initial Parquet header
- </span>writer.write(<span class="kw-2">&</span>batch).<span
class="kw">await</span>.unwrap();
-
- <span class="comment">// updated on write
- </span><span class="kw">let </span>initial_size =
writer.in_progress_size();
- <span class="macro">assert!</span>(initial_size > <span
class="number">0</span>);
- <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
batch.num_rows());
-
- <span class="comment">// updated on second write
- </span>writer.write(<span class="kw-2">&</span>batch).<span
class="kw">await</span>.unwrap();
- <span class="macro">assert!</span>(writer.in_progress_size() >
initial_size);
- <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
batch.num_rows() * <span class="number">2</span>);
-
- <span class="comment">// in progress tracking is cleared, but the
overall data written is updated
- </span><span class="kw">let </span>pre_flush_bytes_written =
writer.bytes_written();
- writer.flush().<span class="kw">await</span>.unwrap();
- <span class="macro">assert_eq!</span>(writer.in_progress_size(),
<span class="number">0</span>);
- <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
<span class="number">0</span>);
- <span class="macro">assert!</span>(writer.bytes_written() >
pre_flush_bytes_written);
-
- writer.close().<span class="kw">await</span>.unwrap();
- }
+ <span class="kw">let </span>temp = tempfile::tempfile().unwrap();
+ <span class="kw">let </span>file =
tokio::fs::File::from_std(temp.try_clone().unwrap());
+ <span class="kw">let </span><span class="kw-2">mut </span>writer =
AsyncArrowWriter::try_new(file, batch.schema(), <span
class="prelude-val">None</span>).unwrap();
+
+ <span class="comment">// starts empty
+ </span><span
class="macro">assert_eq!</span>(writer.in_progress_size(), <span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(writer.in_progress_rows(), <span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(writer.bytes_written(), <span
class="number">4</span>); <span class="comment">// Initial Parquet header
+ </span>writer.write(<span class="kw-2">&</span>batch).<span
class="kw">await</span>.unwrap();
+
+ <span class="comment">// updated on write
+ </span><span class="kw">let </span>initial_size =
writer.in_progress_size();
+ <span class="macro">assert!</span>(initial_size > <span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
batch.num_rows());
+
+ <span class="comment">// updated on second write
+ </span>writer.write(<span class="kw-2">&</span>batch).<span
class="kw">await</span>.unwrap();
+ <span class="macro">assert!</span>(writer.in_progress_size() >
initial_size);
+ <span class="macro">assert_eq!</span>(writer.in_progress_rows(),
batch.num_rows() * <span class="number">2</span>);
+
+ <span class="comment">// in progress tracking is cleared, but the
overall data written is updated
+ </span><span class="kw">let </span>pre_flush_bytes_written =
writer.bytes_written();
+ writer.flush().<span class="kw">await</span>.unwrap();
+ <span class="macro">assert_eq!</span>(writer.in_progress_size(), <span
class="number">0</span>);
+ <span class="macro">assert_eq!</span>(writer.in_progress_rows(), <span
class="number">0</span>);
+ <span class="macro">assert!</span>(writer.bytes_written() >
pre_flush_bytes_written);
+
+ writer.close().<span class="kw">await</span>.unwrap();
}
}
</code></pre></div></section></main></body></html>
\ No newline at end of file