This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b1d134e9ff Add HTTP object store example (#7602)
b1d134e9ff is described below
commit b1d134e9ff37724459cb5090a6346a85152a1db7
Author: Pirmin Kalberer <[email protected]>
AuthorDate: Mon Sep 25 22:43:45 2023 +0200
Add HTTP object store example (#7602)
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion-examples/Cargo.toml | 2 +-
datafusion-examples/README.md | 1 +
datafusion-examples/examples/query-http-csv.rs | 57 ++++++++++++++++++++++++++
3 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index d928f0177d..e5146c7fd9 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -46,7 +46,7 @@ futures = "0.3"
log = "0.4"
mimalloc = { version = "0.1", default-features = false }
num_cpus = "1.13.0"
-object_store = { version = "0.7.0", features = ["aws"] }
+object_store = { version = "0.7.0", features = ["aws", "http"] }
prost = { version = "0.12", default-features = false }
prost-derive = { version = "0.11", default-features = false }
serde = { version = "1.0.136", features = ["derive"] }
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index 02dd9c4173..bfed3976c9 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -54,6 +54,7 @@ cargo run --example csv_sql
- [`parquet_sql.rs`](examples/parquet_sql.rs): Build and run a query plan from
a SQL statement against a local Parquet file
- [`parquet_sql_multiple_files.rs`](examples/parquet_sql_multiple_files.rs):
Build and run a query plan from a SQL statement against multiple local Parquet
files
- [`query-aws-s3.rs`](examples/query-aws-s3.rs): Configure `object_store` and
run a query against files stored in AWS S3
+- [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store`
and run a query against files vi HTTP
- [`rewrite_expr.rs`](examples/rewrite_expr.rs): Define and invoke a custom
Query Optimizer pass
- [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User
Defined Aggregate Function (UDAF)
- [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined
(scalar) Function (UDF)
diff --git a/datafusion-examples/examples/query-http-csv.rs
b/datafusion-examples/examples/query-http-csv.rs
new file mode 100644
index 0000000000..928d702711
--- /dev/null
+++ b/datafusion-examples/examples/query-http-csv.rs
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::error::Result;
+use datafusion::prelude::*;
+use object_store::http::HttpBuilder;
+use std::sync::Arc;
+use url::Url;
+
+/// This example demonstrates executing a simple query against an Arrow data
source (CSV) and
+/// fetching results
+#[tokio::main]
+async fn main() -> Result<()> {
+ // create local execution context
+ let ctx = SessionContext::new();
+
+ // setup http object store
+ let base_url = Url::parse("https://github.com").unwrap();
+ let http_store = HttpBuilder::new()
+ .with_url(base_url.clone())
+ .build()
+ .unwrap();
+ ctx.runtime_env()
+ .register_object_store(&base_url, Arc::new(http_store));
+
+ // register csv file with the execution context
+ ctx.register_csv(
+ "aggregate_test_100",
+
"https://github.com/apache/arrow-testing/raw/master/data/csv/aggregate_test_100.csv",
+ CsvReadOptions::new(),
+ )
+ .await?;
+
+ // execute the query
+ let df = ctx
+ .sql("SELECT c1,c2,c3 FROM aggregate_test_100 LIMIT 5")
+ .await?;
+
+ // print the results
+ df.show().await?;
+
+ Ok(())
+}