This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 3039633d5 Deprecate flight_data_from_arrow_batch (#3353)
3039633d5 is described below
commit 3039633d57b78d20fc4d9ae9165c0d0777e81bb7
Author: Daniël Heres <[email protected]>
AuthorDate: Fri Dec 16 14:55:19 2022 +0100
Deprecate flight_data_from_arrow_batch (#3353)
* Deprecate flight_data_from_arrow_batch
* Add docs
* Re-implement
* Del
* fmt
Co-authored-by: Daniël Heres <[email protected]>
---
arrow-flight/src/utils.rs | 17 +++++++++++++----
arrow-ipc/src/writer.rs | 3 +++
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/arrow-flight/src/utils.rs b/arrow-flight/src/utils.rs
index 392d41c83..855b33385 100644
--- a/arrow-flight/src/utils.rs
+++ b/arrow-flight/src/utils.rs
@@ -29,6 +29,10 @@ use arrow_schema::{ArrowError, Schema, SchemaRef};
/// Convert a `RecordBatch` to a vector of `FlightData` representing the bytes
of the dictionaries
/// and a `FlightData` representing the bytes of the batch's values
+#[deprecated(
+ since = "30.0.0",
+ note = "Use IpcDataGenerator directly with DictionaryTracker to avoid
re-sending dictionaries"
+)]
pub fn flight_data_from_arrow_batch(
batch: &RecordBatch,
options: &IpcWriteOptions,
@@ -149,11 +153,16 @@ pub fn batches_to_flight_data(
let schema_flight_data: FlightData = SchemaAsIpc::new(&schema,
&options).into();
let mut dictionaries = vec![];
let mut flight_data = vec![];
+
+ let data_gen = writer::IpcDataGenerator::default();
+ let mut dictionary_tracker = writer::DictionaryTracker::new(false);
+
for batch in batches.iter() {
- let (flight_dictionaries, flight_datum) =
- flight_data_from_arrow_batch(batch, &options);
- dictionaries.extend(flight_dictionaries);
- flight_data.push(flight_datum);
+ let (encoded_dictionaries, encoded_batch) =
+ data_gen.encoded_batch(batch, &mut dictionary_tracker, &options)?;
+
+ dictionaries.extend(encoded_dictionaries.into_iter().map(Into::into));
+ flight_data.push(encoded_batch.into());
}
let mut stream = vec![schema_flight_data];
stream.extend(dictionaries.into_iter());
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 006660b6a..106b4e4c9 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -316,6 +316,9 @@ impl IpcDataGenerator {
Ok(())
}
+ /// Encodes a batch to a number of [EncodedData] items (dictionary batches
+ the record batch).
+ /// The [DictionaryTracker] keeps track of dictionaries with new
`dict_id`s (so they are only sent once)
+ /// Make sure the [DictionaryTracker] is initialized at the start of the
stream.
pub fn encoded_batch(
&self,
batch: &RecordBatch,