(datafusion-ballista) branch main updated: Code cleanup, move examples, remove unused files (#1075)

agrove Fri, 11 Oct 2024 16:31:44 -0700

This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-ballista.git



The following commit(s) were added to refs/heads/main by this push:
     new 36ba83ec Code cleanup, move examples, remove unused files (#1075)
36ba83ec is described below

commit 36ba83ec269cee28dd3701605249159af8718ebd
Author: Marko Milenković <[email protected]>
AuthorDate: Sat Oct 12 00:30:14 2024 +0100

    Code cleanup, move examples, remove unused files (#1075)
    
    Relates to: #1066 & #1067
---
 .readthedocs.yml                                   |  19 ---
 ballista/client/src/columnar_batch.rs              | 165 ---------------------
 ballista/client/src/lib.rs                         |   1 -
 .../dataframe.rs => examples/remote-dataframe.rs}  |   0
 .../{src/bin/sql.rs => examples/remote-sql.rs}     |   0
 5 files changed, 185 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
deleted file mode 100644
index 11a7d70c..00000000
--- a/.readthedocs.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-conda:
-    file: python/doc/environment.yml
diff --git a/ballista/client/src/columnar_batch.rs 
b/ballista/client/src/columnar_batch.rs
deleted file mode 100644
index 5e7fe89b..00000000
--- a/ballista/client/src/columnar_batch.rs
+++ /dev/null
@@ -1,165 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use std::{collections::HashMap, sync::Arc};
-
-use ballista_core::error::{ballista_error, Result};
-
-use datafusion::arrow::{
-    array::ArrayRef,
-    datatypes::{DataType, Schema},
-    record_batch::RecordBatch,
-};
-use datafusion::scalar::ScalarValue;
-
-pub type MaybeColumnarBatch = Result<Option<ColumnarBatch>>;
-
-/// Batch of columnar data.
-#[derive(Debug, Clone)]
-pub struct ColumnarBatch {
-    schema: Arc<Schema>,
-    columns: HashMap<String, ColumnarValue>,
-}
-
-impl ColumnarBatch {
-    pub fn from_arrow(batch: &RecordBatch) -> Self {
-        let columns = batch
-            .columns()
-            .iter()
-            .enumerate()
-            .map(|(i, array)| {
-                (
-                    batch.schema().field(i).name().clone(),
-                    ColumnarValue::Columnar(array.clone()),
-                )
-            })
-            .collect();
-
-        Self {
-            schema: batch.schema(),
-            columns,
-        }
-    }
-
-    pub fn from_values(values: &[ColumnarValue], schema: &Schema) -> Self {
-        let columns = schema
-            .fields()
-            .iter()
-            .enumerate()
-            .map(|(i, f)| (f.name().clone(), values[i].clone()))
-            .collect();
-
-        Self {
-            schema: Arc::new(schema.clone()),
-            columns,
-        }
-    }
-
-    pub fn to_arrow(&self) -> Result<RecordBatch> {
-        let arrays = self
-            .schema
-            .fields()
-            .iter()
-            .map(|c| {
-                match self.column(c.name())? {
-                    ColumnarValue::Columnar(array) => Ok(array.clone()),
-                    ColumnarValue::Scalar(_, _) => {
-                        // note that this can be implemented easily if needed
-                        Err(ballista_error("Cannot convert scalar value to 
Arrow array"))
-                    }
-                }
-            })
-            .collect::<Result<Vec<_>>>()?;
-
-        Ok(RecordBatch::try_new(self.schema.clone(), arrays)?)
-    }
-
-    pub fn schema(&self) -> Arc<Schema> {
-        self.schema.clone()
-    }
-
-    pub fn num_columns(&self) -> usize {
-        self.columns.len()
-    }
-
-    pub fn num_rows(&self) -> usize {
-        self.columns[self.schema.field(0).name()].len()
-    }
-
-    pub fn column(&self, name: &str) -> Result<&ColumnarValue> {
-        Ok(&self.columns[name])
-    }
-
-    pub fn memory_size(&self) -> usize {
-        self.columns.values().map(|c| c.memory_size()).sum()
-    }
-}
-
-/// A columnar value can either be a scalar value or an Arrow array.
-#[derive(Debug, Clone)]
-pub enum ColumnarValue {
-    Scalar(ScalarValue, usize),
-    Columnar(ArrayRef),
-}
-
-impl ColumnarValue {
-    pub fn len(&self) -> usize {
-        match self {
-            ColumnarValue::Scalar(_, n) => *n,
-            ColumnarValue::Columnar(array) => array.len(),
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn data_type(&self) -> &DataType {
-        match self {
-            ColumnarValue::Columnar(array) => array.data_type(),
-            ColumnarValue::Scalar(value, _) => match value {
-                ScalarValue::UInt8(_) => &DataType::UInt8,
-                ScalarValue::UInt16(_) => &DataType::UInt16,
-                ScalarValue::UInt32(_) => &DataType::UInt32,
-                ScalarValue::UInt64(_) => &DataType::UInt64,
-                ScalarValue::Int8(_) => &DataType::Int8,
-                ScalarValue::Int16(_) => &DataType::Int16,
-                ScalarValue::Int32(_) => &DataType::Int32,
-                ScalarValue::Int64(_) => &DataType::Int64,
-                ScalarValue::Float32(_) => &DataType::Float32,
-                ScalarValue::Float64(_) => &DataType::Float64,
-                _ => unimplemented!(),
-            },
-        }
-    }
-
-    pub fn to_arrow(&self) -> Result<ArrayRef> {
-        match self {
-            ColumnarValue::Columnar(array) => Ok(array.clone()),
-            ColumnarValue::Scalar(value, n) => {
-                value.to_array_of_size(*n).map_err(|x| x.into())
-            }
-        }
-    }
-
-    pub fn memory_size(&self) -> usize {
-        match self {
-            ColumnarValue::Columnar(array) => array.get_array_memory_size(),
-            _ => 0,
-        }
-    }
-}
diff --git a/ballista/client/src/lib.rs b/ballista/client/src/lib.rs
index 125278dc..e61dfef2 100644
--- a/ballista/client/src/lib.rs
+++ b/ballista/client/src/lib.rs
@@ -17,6 +17,5 @@
 
 #![doc = include_str!("../README.md")]
 
-pub mod columnar_batch;
 pub mod context;
 pub mod prelude;
diff --git a/examples/src/bin/dataframe.rs 
b/examples/examples/remote-dataframe.rs
similarity index 100%
rename from examples/src/bin/dataframe.rs
rename to examples/examples/remote-dataframe.rs
diff --git a/examples/src/bin/sql.rs b/examples/examples/remote-sql.rs
similarity index 100%
rename from examples/src/bin/sql.rs
rename to examples/examples/remote-sql.rs


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(datafusion-ballista) branch main updated: Code cleanup, move examples, remove unused files (#1075)

Reply via email to