GitHub user yuyang-ok closed a discussion: why datafusion consume 2g of
memory??
~~~rust
use datafusion::{
arrow::{
array::{Int32Array, RecordBatch, StringArray},
datatypes::{DataType, Field, Schema},
},
catalog::TableProvider,
datasource::MemTable,
prelude::SessionContext,
};
use rand::Rng;
use std::{sync::Arc, time::Duration};
#[tokio::main]
async fn main() {
let ctx = SessionContext::new();
ctx.register_parquet(
"aaa",
"/opt/datafabric/cache/view_common_v1/datafusion/public/test_cce/76af01f5-2f3e-406f-b9cc-9b5cb48eaf42",
Default::default(),
)
.await
.unwrap();
ctx.register_parquet(
"bbb",
"/opt/datafabric/cache/view_common_v1/datafusion/public/test_3b8/48af93f4-34f3-496c-a709-ffaa1f475e9e",
Default::default(),
)
.await
.unwrap();
ctx.sql("select count(*) from aaa")
.await
.unwrap()
.show()
.await
.unwrap();
ctx.sql("select count(*) from bbb")
.await
.unwrap()
.show()
.await
.unwrap();
for _ in 0..1 {
let ctx = ctx.clone();
tokio::spawn(async move {
ctx.sql("select count(*) from aaa,bbb where aaa.geom = bbb.geom ")
.await
.unwrap()
.show()
.await
.unwrap();
});
}
loop {
tokio::time::sleep(Duration::from_secs(1)).await;
}
}
~~~
~~~
[package]
name = "abc-rust"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at
https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
[dependencies]
anyhow = "1.0.86"
serde = "1.0.204"
serde_json = "1.0.120"
# sqlx = { version = "0.7.4", features = ["runtime-tokio", "sqlite"] }
tokio = { version = "1.38.0", features = ["full"] }
# futures = { version = "*" }
# futures-util = { version = "*" }
datafusion = { version = "41.0", features = ["serde", "parquet"] }
rand = "0.8.5"
[profile.dev]
~~~
GitHub link: https://github.com/apache/datafusion/discussions/12386
----
This is an automatically sent email for [email protected].
To unsubscribe, please send an email to:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]