This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c751de  Add minimal crate documentation for Ballista crates (#831)
7c751de is described below

commit 7c751de70076b6a218fccb9bb6d32e65c1cfeb34
Author: Andy Grove <[email protected]>
AuthorDate: Sat Aug 7 14:13:12 2021 -0600

    Add minimal crate documentation for Ballista crates (#831)
---
 ballista/rust/client/src/lib.rs    | 98 ++++++++++++++++++++++++++++++++++++++
 ballista/rust/core/src/lib.rs      |  7 ++-
 ballista/rust/executor/src/lib.rs  |  5 +-
 ballista/rust/scheduler/src/lib.rs |  5 +-
 4 files changed, 112 insertions(+), 3 deletions(-)

diff --git a/ballista/rust/client/src/lib.rs b/ballista/rust/client/src/lib.rs
index c3c6291..35bd12b 100644
--- a/ballista/rust/client/src/lib.rs
+++ b/ballista/rust/client/src/lib.rs
@@ -15,6 +15,104 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Ballista is a distributed compute platform primarily implemented in Rust, 
and powered by Apache Arrow and
+//! DataFusion. It is built on an architecture that allows other programming 
languages (such as Python, C++, and
+//! Java) to be supported as first-class citizens without paying a penalty for 
serialization costs.
+//!
+//! The foundational technologies in Ballista are:
+//!
+//! - [Apache Arrow](https://arrow.apache.org/) memory model and compute 
kernels for efficient processing of data.
+//! - [Apache Arrow Flight 
Protocol](https://arrow.apache.org/blog/2019/10/13/introducing-arrow-flight/) 
for efficient
+//!   data transfer between processes.
+//! - [Google Protocol 
Buffers](https://developers.google.com/protocol-buffers) for serializing query 
plans.
+//! - [Docker](https://www.docker.com/) for packaging up executors along with 
user-defined code.
+//!
+//! Ballista can be deployed as a standalone cluster and also supports 
[Kubernetes](https://kubernetes.io/). In either
+//! case, the scheduler can be configured to use [etcd](https://etcd.io/) as a 
backing store to (eventually) provide
+//! redundancy in the case of a scheduler failing.
+//!
+//! ## Starting a cluster
+//!
+//! There are numerous ways to start a Ballista cluster, including support for 
Docker and
+//! Kubernetes. For full documentation, refer to the
+//! [DataFusion User 
Guide](https://github.com/apache/arrow-datafusion/tree/master/docs/user-guide)
+//!
+//! A simple way to start a local cluster for testing purposes is to use cargo 
to install
+//! the scheduler and executor crates.
+//!
+//! ```bash
+//! cargo install ballista-scheduler
+//! cargo install ballista-executor
+//! ```
+//!
+//! With these crates installed, it is now possible to start a scheduler 
process.
+//!
+//! ```bash
+//! RUST_LOG=info ballista-scheduler
+//! ```
+//!
+//! The scheduler will bind to port 50050 by default.
+//!
+//! Next, start an executor processes in a new terminal session with the 
specified concurrency
+//! level.
+//!
+//! ```bash
+//! RUST_LOG=info ballista-executor -c 4
+//! ```
+//!
+//! The executor will bind to port 50051 by default. Additional executors can 
be started by
+//! manually specifying a bind port. For example:
+//!
+//! ```bash
+//! RUST_LOG=info ballista-executor --bind-port 50052 -c 4
+//! ```
+//!
+//! ## Executing a query
+//!
+//! Ballista provides a `BallistaContext` as a starting point for creating 
queries. DataFrames can be created
+//! by invoking the `read_csv`, `read_parquet`, and `sql` methods.
+//!
+//! The following example runs a simple aggregate SQL query against a CSV file 
from the
+//! [New York Taxi and Limousine 
Commission](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page)
+//! data set.
+//!
+//! ```no_run
+//! use ballista::prelude::*;
+//! use datafusion::arrow::util::pretty;
+//! use datafusion::prelude::CsvReadOptions;
+//!
+//! #[tokio::main]
+//! async fn main() -> Result<()> {
+//!    // create configuration
+//!    let config = BallistaConfig::builder()
+//!        .set("ballista.shuffle.partitions", "4")
+//!        .build()?;
+//!
+//!    // connect to Ballista scheduler
+//!    let ctx = BallistaContext::remote("localhost", 50050, &config);
+//!
+//!    // register csv file with the execution context
+//!    ctx.register_csv(
+//!        "tripdata",
+//!        "/path/to/yellow_tripdata_2020-01.csv",
+//!        CsvReadOptions::new(),
+//!    )?;
+//!
+//!    // execute the query
+//!    let df = ctx.sql(
+//!        "SELECT passenger_count, MIN(fare_amount), MAX(fare_amount), 
AVG(fare_amount), SUM(fare_amount)
+//!        FROM tripdata
+//!        GROUP BY passenger_count
+//!        ORDER BY passenger_count",
+//!    )?;
+//!
+//!    // collect the results and print them to stdout
+//!    let results = df.collect().await?;
+//!    pretty::print_batches(&results)?;
+//!    Ok(())
+//! }
+//! ```
+
 pub mod columnar_batch;
 pub mod context;
 pub mod prelude;
diff --git a/ballista/rust/core/src/lib.rs b/ballista/rust/core/src/lib.rs
index 2a84869..614bf9a 100644
--- a/ballista/rust/core/src/lib.rs
+++ b/ballista/rust/core/src/lib.rs
@@ -15,7 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Ballista Distributed Compute
+//! Ballista Core Library
+//!
+//! This crate contains the Ballista core library which is used as a 
dependency by the ballista,
+//! ballista-scheduler, and ballista-executor crates. Refer to 
<https://crates.io/crates/ballista> for
+//! general Ballista documentation.
+
 #![allow(unused_imports)]
 pub const BALLISTA_VERSION: &str = env!("CARGO_PKG_VERSION");
 
diff --git a/ballista/rust/executor/src/lib.rs 
b/ballista/rust/executor/src/lib.rs
index f3ab7dc..f2abf31 100644
--- a/ballista/rust/executor/src/lib.rs
+++ b/ballista/rust/executor/src/lib.rs
@@ -15,7 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Core executor logic for executing queries and storing results in memory.
+//! Ballista Executor Process
+//!
+//! This crate contains the Ballista executor process. Refer to 
<https://crates.io/crates/ballista> for
+//! documentation.
 
 pub mod collect;
 pub mod execution_loop;
diff --git a/ballista/rust/scheduler/src/lib.rs 
b/ballista/rust/scheduler/src/lib.rs
index 3e4e735..676975f 100644
--- a/ballista/rust/scheduler/src/lib.rs
+++ b/ballista/rust/scheduler/src/lib.rs
@@ -15,7 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! Support for distributed schedulers, such as Kubernetes
+//! Ballista Scheduler Process
+//!
+//! This crate contains the Ballista scheduler process. Refer to 
<https://crates.io/crates/ballista> for
+//! documentation.
 
 pub mod api;
 pub mod planner;

Reply via email to