This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-ballista.git
The following commit(s) were added to refs/heads/main by this push:
new 36ba83ec Code cleanup, move examples, remove unused files (#1075)
36ba83ec is described below
commit 36ba83ec269cee28dd3701605249159af8718ebd
Author: Marko Milenković <[email protected]>
AuthorDate: Sat Oct 12 00:30:14 2024 +0100
Code cleanup, move examples, remove unused files (#1075)
Relates to: #1066 & #1067
---
.readthedocs.yml | 19 ---
ballista/client/src/columnar_batch.rs | 165 ---------------------
ballista/client/src/lib.rs | 1 -
.../dataframe.rs => examples/remote-dataframe.rs} | 0
.../{src/bin/sql.rs => examples/remote-sql.rs} | 0
5 files changed, 185 deletions(-)
diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index 11a7d70c..00000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-conda:
- file: python/doc/environment.yml
diff --git a/ballista/client/src/columnar_batch.rs
b/ballista/client/src/columnar_batch.rs
deleted file mode 100644
index 5e7fe89b..00000000
--- a/ballista/client/src/columnar_batch.rs
+++ /dev/null
@@ -1,165 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use ballista_core::error::{ballista_error, Result};
-
-use datafusion::arrow::{
- array::ArrayRef,
- datatypes::{DataType, Schema},
- record_batch::RecordBatch,
-};
-use datafusion::scalar::ScalarValue;
-
-pub type MaybeColumnarBatch = Result<Option<ColumnarBatch>>;
-
-/// Batch of columnar data.
-#[derive(Debug, Clone)]
-pub struct ColumnarBatch {
- schema: Arc<Schema>,
- columns: HashMap<String, ColumnarValue>,
-}
-
-impl ColumnarBatch {
- pub fn from_arrow(batch: &RecordBatch) -> Self {
- let columns = batch
- .columns()
- .iter()
- .enumerate()
- .map(|(i, array)| {
- (
- batch.schema().field(i).name().clone(),
- ColumnarValue::Columnar(array.clone()),
- )
- })
- .collect();
-
- Self {
- schema: batch.schema(),
- columns,
- }
- }
-
- pub fn from_values(values: &[ColumnarValue], schema: &Schema) -> Self {
- let columns = schema
- .fields()
- .iter()
- .enumerate()
- .map(|(i, f)| (f.name().clone(), values[i].clone()))
- .collect();
-
- Self {
- schema: Arc::new(schema.clone()),
- columns,
- }
- }
-
- pub fn to_arrow(&self) -> Result<RecordBatch> {
- let arrays = self
- .schema
- .fields()
- .iter()
- .map(|c| {
- match self.column(c.name())? {
- ColumnarValue::Columnar(array) => Ok(array.clone()),
- ColumnarValue::Scalar(_, _) => {
- // note that this can be implemented easily if needed
- Err(ballista_error("Cannot convert scalar value to
Arrow array"))
- }
- }
- })
- .collect::<Result<Vec<_>>>()?;
-
- Ok(RecordBatch::try_new(self.schema.clone(), arrays)?)
- }
-
- pub fn schema(&self) -> Arc<Schema> {
- self.schema.clone()
- }
-
- pub fn num_columns(&self) -> usize {
- self.columns.len()
- }
-
- pub fn num_rows(&self) -> usize {
- self.columns[self.schema.field(0).name()].len()
- }
-
- pub fn column(&self, name: &str) -> Result<&ColumnarValue> {
- Ok(&self.columns[name])
- }
-
- pub fn memory_size(&self) -> usize {
- self.columns.values().map(|c| c.memory_size()).sum()
- }
-}
-
-/// A columnar value can either be a scalar value or an Arrow array.
-#[derive(Debug, Clone)]
-pub enum ColumnarValue {
- Scalar(ScalarValue, usize),
- Columnar(ArrayRef),
-}
-
-impl ColumnarValue {
- pub fn len(&self) -> usize {
- match self {
- ColumnarValue::Scalar(_, n) => *n,
- ColumnarValue::Columnar(array) => array.len(),
- }
- }
-
- pub fn is_empty(&self) -> bool {
- self.len() == 0
- }
-
- pub fn data_type(&self) -> &DataType {
- match self {
- ColumnarValue::Columnar(array) => array.data_type(),
- ColumnarValue::Scalar(value, _) => match value {
- ScalarValue::UInt8(_) => &DataType::UInt8,
- ScalarValue::UInt16(_) => &DataType::UInt16,
- ScalarValue::UInt32(_) => &DataType::UInt32,
- ScalarValue::UInt64(_) => &DataType::UInt64,
- ScalarValue::Int8(_) => &DataType::Int8,
- ScalarValue::Int16(_) => &DataType::Int16,
- ScalarValue::Int32(_) => &DataType::Int32,
- ScalarValue::Int64(_) => &DataType::Int64,
- ScalarValue::Float32(_) => &DataType::Float32,
- ScalarValue::Float64(_) => &DataType::Float64,
- _ => unimplemented!(),
- },
- }
- }
-
- pub fn to_arrow(&self) -> Result<ArrayRef> {
- match self {
- ColumnarValue::Columnar(array) => Ok(array.clone()),
- ColumnarValue::Scalar(value, n) => {
- value.to_array_of_size(*n).map_err(|x| x.into())
- }
- }
- }
-
- pub fn memory_size(&self) -> usize {
- match self {
- ColumnarValue::Columnar(array) => array.get_array_memory_size(),
- _ => 0,
- }
- }
-}
diff --git a/ballista/client/src/lib.rs b/ballista/client/src/lib.rs
index 125278dc..e61dfef2 100644
--- a/ballista/client/src/lib.rs
+++ b/ballista/client/src/lib.rs
@@ -17,6 +17,5 @@
#![doc = include_str!("../README.md")]
-pub mod columnar_batch;
pub mod context;
pub mod prelude;
diff --git a/examples/src/bin/dataframe.rs
b/examples/examples/remote-dataframe.rs
similarity index 100%
rename from examples/src/bin/dataframe.rs
rename to examples/examples/remote-dataframe.rs
diff --git a/examples/src/bin/sql.rs b/examples/examples/remote-sql.rs
similarity index 100%
rename from examples/src/bin/sql.rs
rename to examples/examples/remote-sql.rs
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]