This is an automated email from the ASF dual-hosted git repository.
mete pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new fd121d3e29 Add examples of DataFrame::write* methods without S3
dependency (#8606)
fd121d3e29 is described below
commit fd121d3e29404a243a3c18c67c40fa7132ed9ed2
Author: Devin D'Angelo <[email protected]>
AuthorDate: Fri Dec 22 02:00:25 2023 -0500
Add examples of DataFrame::write* methods without S3 dependency (#8606)
---
datafusion-examples/README.md | 3 +-
datafusion-examples/examples/dataframe_output.rs | 76 ++++++++++++++++++++++++
2 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 305422ccd0..057cdd4752 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -47,7 +47,8 @@ cargo run --example csv_sql
- [`catalog.rs`](examples/external_dependency/catalog.rs): Register the table
into a custom catalog
- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against
a custom datasource (TableProvider)
- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame
against a local parquet file
-- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run
a query using a DataFrame against a parquet file from s3
+- [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run
a query using a DataFrame against a parquet file from s3 and writing back to s3
+- [`dataframe_output.rs`](examples/dataframe_output.rs): Examples of methods
which write data out from a DataFrame
- [`dataframe_in_memory.rs`](examples/dataframe_in_memory.rs): Run a query
using a DataFrame against data in memory
- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert
query results into rust structs using serde
- [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify and anaylze
`Expr`s
diff --git a/datafusion-examples/examples/dataframe_output.rs
b/datafusion-examples/examples/dataframe_output.rs
new file mode 100644
index 0000000000..c773384dfc
--- /dev/null
+++ b/datafusion-examples/examples/dataframe_output.rs
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::{dataframe::DataFrameWriteOptions, prelude::*};
+use datafusion_common::{parsers::CompressionTypeVariant, DataFusionError};
+
+/// This example demonstrates the various methods to write out a DataFrame to
local storage.
+/// See datafusion-examples/examples/external_dependency/dataframe-to-s3.rs
for an example
+/// using a remote object store.
+#[tokio::main]
+async fn main() -> Result<(), DataFusionError> {
+ let ctx = SessionContext::new();
+
+ let mut df = ctx.sql("values ('a'), ('b'), ('c')").await.unwrap();
+
+ // Ensure the column names and types match the target table
+ df = df.with_column_renamed("column1", "tablecol1").unwrap();
+
+ ctx.sql(
+ "create external table
+ test(tablecol1 varchar)
+ stored as parquet
+ location './datafusion-examples/test_table/'",
+ )
+ .await?
+ .collect()
+ .await?;
+
+ // This is equivalent to INSERT INTO test VALUES ('a'), ('b'), ('c').
+ // The behavior of write_table depends on the TableProvider's
implementation
+ // of the insert_into method.
+ df.clone()
+ .write_table("test", DataFrameWriteOptions::new())
+ .await?;
+
+ df.clone()
+ .write_parquet(
+ "./datafusion-examples/test_parquet/",
+ DataFrameWriteOptions::new(),
+ None,
+ )
+ .await?;
+
+ df.clone()
+ .write_csv(
+ "./datafusion-examples/test_csv/",
+ // DataFrameWriteOptions contains options which control how data
is written
+ // such as compression codec
+
DataFrameWriteOptions::new().with_compression(CompressionTypeVariant::GZIP),
+ None,
+ )
+ .await?;
+
+ df.clone()
+ .write_json(
+ "./datafusion-examples/test_json/",
+ DataFrameWriteOptions::new(),
+ )
+ .await?;
+
+ Ok(())
+}