tustvold commented on code in PR #9613:
URL: https://github.com/apache/arrow-datafusion/pull/9613#discussion_r1529003016
##########
datafusion-examples/examples/deserialize_to_struct.rs:
##########
@@ -15,61 +15,61 @@
// specific language governing permissions and limitations
// under the License.
+use arrow::array::AsArray;
+use arrow::datatypes::{Float64Type, Int32Type};
use datafusion::error::Result;
use datafusion::prelude::*;
-use serde::Deserialize;
+use futures::StreamExt;
/// This example shows that it is possible to convert query results into Rust
structs .
-/// It will collect the query results into RecordBatch, then convert it to
serde_json::Value.
-/// Then, serde_json::Value is turned into Rust's struct.
-/// Any datatype with `Deserialize` implemeneted works.
#[tokio::main]
async fn main() -> Result<()> {
let data_list = Data::new().await?;
println!("{data_list:#?}");
Ok(())
}
-#[derive(Deserialize, Debug)]
+#[derive(Debug)]
struct Data {
#[allow(dead_code)]
- int_col: i64,
+ int_col: i32,
#[allow(dead_code)]
double_col: f64,
}
impl Data {
pub async fn new() -> Result<Vec<Self>> {
// this group is almost the same as the one you find it in
parquet_sql.rs
- let batches = {
- let ctx = SessionContext::new();
+ let ctx = SessionContext::new();
- let testdata = datafusion::test_util::parquet_test_data();
+ let testdata = datafusion::test_util::parquet_test_data();
- ctx.register_parquet(
- "alltypes_plain",
- &format!("{testdata}/alltypes_plain.parquet"),
- ParquetReadOptions::default(),
- )
- .await?;
+ ctx.register_parquet(
+ "alltypes_plain",
+ &format!("{testdata}/alltypes_plain.parquet"),
+ ParquetReadOptions::default(),
+ )
+ .await?;
- let df = ctx
- .sql("SELECT int_col, double_col FROM alltypes_plain")
- .await?;
+ let df = ctx
+ .sql("SELECT int_col, double_col FROM alltypes_plain")
+ .await?;
- df.clone().show().await?;
+ df.clone().show().await?;
- df.collect().await?
- };
- let batches: Vec<_> = batches.iter().collect();
+ let mut stream = df.execute_stream().await?;
+ let mut list = vec![];
+ while let Some(b) = stream.next().await.transpose()? {
+ let int_col = b.column(0).as_primitive::<Int32Type>();
+ let float_col = b.column(1).as_primitive::<Float64Type>();
- // converts it to serde_json type and then convert that into Rust type
Review Comment:
You can serialize to JSON and parse it, but I would rather encourage people
towards the performant way of doing things
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]