This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/main by this push:
     new a53ce95  Arrow type bindings (#173)
a53ce95 is described below

commit a53ce958061c500d3ad2edb396aac2f9ae2943d4
Author: Jeremy Dyer <[email protected]>
AuthorDate: Mon Feb 13 17:13:57 2023 -0500

    Arrow type bindings (#173)
    
    * type bindings
    
    * checkpoint commit
    
    * First mappings of types
    
    * map_from_arrow_type
    
    * update LogicalPlan crate location
    
    * add missing apache license to src/sql/exceptions.rs
    
    * cargo fmt
    
    * clippy warnings
    
    * format!() changes
    
    * Add ARM coverage for DataType::RunEndEncoded
    
    * Return DataFusionError instead of unimplemented!()
    
    * Add github access token and address review comments
    
    * small cargo fmt fix
---
 .github/workflows/build.yml |   1 +
 .github/workflows/test.yaml |   1 +
 Cargo.lock                  |  24 +--
 src/common/data_type.rs     | 511 ++++++++++++++++++++++++++++++++++++++++++++
 src/common/df_field.rs      |  41 ++++
 src/common/mod.rs           |  19 ++
 src/context.rs              |   2 +-
 src/dataframe.rs            |   2 +-
 src/lib.rs                  |   5 +-
 src/sql.rs                  |  19 ++
 src/sql/exceptions.rs       |  42 ++++
 src/{ => sql}/logical.rs    |   0
 src/substrait.rs            |   6 +-
 13 files changed, 654 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f457b3d..54fdcfe 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -101,6 +101,7 @@ jobs:
         uses: arduino/setup-protoc@v1
         with:
           version: '3.x'
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
       - name: Build wheels
         uses: PyO3/maturin-action@v1
         with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 2142ace..164b09e 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -57,6 +57,7 @@ jobs:
         uses: arduino/setup-protoc@v1
         with:
           version: '3.x'
+          repo-token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Setup Python
         uses: actions/setup-python@v4
diff --git a/Cargo.lock b/Cargo.lock
index 6ca4adb..9f50fa3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -618,9 +618,9 @@ dependencies = [
 
 [[package]]
 name = "cxx"
-version = "1.0.89"
+version = "1.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9"
+checksum = "90d59d9acd2a682b4e40605a242f6670eaa58c5957471cbf85e8aa6a0b97a5e8"
 dependencies = [
  "cc",
  "cxxbridge-flags",
@@ -630,9 +630,9 @@ dependencies = [
 
 [[package]]
 name = "cxx-build"
-version = "1.0.89"
+version = "1.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d"
+checksum = "ebfa40bda659dd5c864e65f4c9a2b0aff19bea56b017b9b77c73d3766a453a38"
 dependencies = [
  "cc",
  "codespan-reporting",
@@ -645,15 +645,15 @@ dependencies = [
 
 [[package]]
 name = "cxxbridge-flags"
-version = "1.0.89"
+version = "1.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a"
+checksum = "457ce6757c5c70dc6ecdbda6925b958aae7f959bda7d8fb9bde889e34a09dc03"
 
 [[package]]
 name = "cxxbridge-macro"
-version = "1.0.89"
+version = "1.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2"
+checksum = "ebf883b7aacd7b2aeb2a7b338648ee19f57c140d4ee8e52c68979c6b2f7f2263"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2515,9 +2515,9 @@ dependencies = [
 
 [[package]]
 name = "target-lexicon"
-version = "0.12.5"
+version = "0.12.6"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d"
+checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5"
 
 [[package]]
 name = "tempfile"
@@ -2662,9 +2662,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-util"
-version = "0.7.6"
+version = "0.7.7"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "bc6a3b08b64e6dfad376fa2432c7b1f01522e37a623c3050bc95db2d3ff21583"
+checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2"
 dependencies = [
  "bytes",
  "futures-core",
diff --git a/src/common/data_type.rs b/src/common/data_type.rs
new file mode 100644
index 0000000..8ada1c7
--- /dev/null
+++ b/src/common/data_type.rs
@@ -0,0 +1,511 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion_common::DataFusionError;
+use pyo3::prelude::*;
+
+use crate::errors::py_datafusion_err;
+
+/// These bindings are tying together several disparate systems.
+/// You have SQL types for the SQL strings and RDBMS systems itself.
+/// Rust types for the DataFusion code
+/// Arrow types which represents the underlying arrow format
+/// Python types which represent the type in Python
+/// It is important to keep all of those types in a single
+/// and managable location. Therefore this structure exists
+/// to map those types and provide a simple place for developers
+/// to map types from one system to another.
+#[derive(Debug, Clone)]
+#[pyclass(name = "DataTypeMap", module = "datafusion", subclass)]
+pub struct DataTypeMap {
+    #[allow(dead_code)]
+    arrow_type: PyDataType,
+    #[allow(dead_code)]
+    python_type: PythonType,
+    #[allow(dead_code)]
+    sql_type: SqlType,
+}
+
+impl DataTypeMap {
+    fn new(arrow_type: DataType, python_type: PythonType, sql_type: SqlType) 
-> Self {
+        DataTypeMap {
+            arrow_type: PyDataType {
+                data_type: arrow_type,
+            },
+            python_type,
+            sql_type,
+        }
+    }
+
+    pub fn map_from_arrow_type(arrow_type: &DataType) -> Result<DataTypeMap, 
PyErr> {
+        match arrow_type {
+            DataType::Null => Ok(DataTypeMap::new(
+                DataType::Null,
+                PythonType::None,
+                SqlType::NULL,
+            )),
+            DataType::Boolean => Ok(DataTypeMap::new(
+                DataType::Boolean,
+                PythonType::Bool,
+                SqlType::BOOLEAN,
+            )),
+            DataType::Int8 => Ok(DataTypeMap::new(
+                DataType::Int8,
+                PythonType::Int,
+                SqlType::TINYINT,
+            )),
+            DataType::Int16 => Ok(DataTypeMap::new(
+                DataType::Int16,
+                PythonType::Int,
+                SqlType::SMALLINT,
+            )),
+            DataType::Int32 => Ok(DataTypeMap::new(
+                DataType::Int32,
+                PythonType::Int,
+                SqlType::INTEGER,
+            )),
+            DataType::Int64 => Ok(DataTypeMap::new(
+                DataType::Int64,
+                PythonType::Int,
+                SqlType::BIGINT,
+            )),
+            DataType::UInt8 => Ok(DataTypeMap::new(
+                DataType::UInt8,
+                PythonType::Int,
+                SqlType::TINYINT,
+            )),
+            DataType::UInt16 => Ok(DataTypeMap::new(
+                DataType::UInt16,
+                PythonType::Int,
+                SqlType::SMALLINT,
+            )),
+            DataType::UInt32 => Ok(DataTypeMap::new(
+                DataType::UInt32,
+                PythonType::Int,
+                SqlType::INTEGER,
+            )),
+            DataType::UInt64 => Ok(DataTypeMap::new(
+                DataType::UInt64,
+                PythonType::Int,
+                SqlType::BIGINT,
+            )),
+            DataType::Float16 => Ok(DataTypeMap::new(
+                DataType::Float16,
+                PythonType::Float,
+                SqlType::FLOAT,
+            )),
+            DataType::Float32 => Ok(DataTypeMap::new(
+                DataType::Float32,
+                PythonType::Float,
+                SqlType::FLOAT,
+            )),
+            DataType::Float64 => Ok(DataTypeMap::new(
+                DataType::Float64,
+                PythonType::Float,
+                SqlType::FLOAT,
+            )),
+            DataType::Timestamp(_, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Date32 => Ok(DataTypeMap::new(
+                DataType::Date32,
+                PythonType::Datetime,
+                SqlType::DATE,
+            )),
+            DataType::Date64 => Ok(DataTypeMap::new(
+                DataType::Date64,
+                PythonType::Datetime,
+                SqlType::DATE,
+            )),
+            DataType::Time32(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Time64(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Duration(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Interval(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Binary => Ok(DataTypeMap::new(
+                DataType::Binary,
+                PythonType::Bytes,
+                SqlType::BINARY,
+            )),
+            DataType::FixedSizeBinary(_) => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+            )),
+            DataType::LargeBinary => Ok(DataTypeMap::new(
+                DataType::LargeBinary,
+                PythonType::Bytes,
+                SqlType::BINARY,
+            )),
+            DataType::Utf8 => Ok(DataTypeMap::new(
+                DataType::Utf8,
+                PythonType::Str,
+                SqlType::VARCHAR,
+            )),
+            DataType::LargeUtf8 => Ok(DataTypeMap::new(
+                DataType::LargeUtf8,
+                PythonType::Str,
+                SqlType::VARCHAR,
+            )),
+            DataType::List(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                arrow_type
+            )))),
+            DataType::FixedSizeList(_, _) => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+            )),
+            DataType::LargeList(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Struct(_) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Union(_, _, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Dictionary(_, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Decimal128(_, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Decimal256(_, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::Map(_, _) => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", arrow_type),
+            ))),
+            DataType::RunEndEncoded(_, _) => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+            )),
+        }
+    }
+}
+
+#[pymethods]
+impl DataTypeMap {
+    #[new]
+    pub fn py_new(arrow_type: PyDataType, python_type: PythonType, sql_type: 
SqlType) -> Self {
+        DataTypeMap {
+            arrow_type,
+            python_type,
+            sql_type,
+        }
+    }
+
+    #[staticmethod]
+    #[pyo3(name = "arrow")]
+    pub fn py_map_from_arrow_type(arrow_type: &PyDataType) -> 
PyResult<DataTypeMap> {
+        DataTypeMap::map_from_arrow_type(&arrow_type.data_type)
+    }
+
+    #[staticmethod]
+    #[pyo3(name = "sql")]
+    pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult<DataTypeMap> {
+        match sql_type {
+            SqlType::ANY => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::ARRAY => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::BIGINT => Ok(DataTypeMap::new(
+                DataType::Int64,
+                PythonType::Int,
+                SqlType::BIGINT,
+            )),
+            SqlType::BINARY => Ok(DataTypeMap::new(
+                DataType::Binary,
+                PythonType::Bytes,
+                SqlType::BINARY,
+            )),
+            SqlType::BOOLEAN => Ok(DataTypeMap::new(
+                DataType::Boolean,
+                PythonType::Bool,
+                SqlType::BOOLEAN,
+            )),
+            SqlType::CHAR => Ok(DataTypeMap::new(
+                DataType::UInt8,
+                PythonType::Int,
+                SqlType::CHAR,
+            )),
+            SqlType::COLUMN_LIST => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::CURSOR => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::DATE => Ok(DataTypeMap::new(
+                DataType::Date64,
+                PythonType::Datetime,
+                SqlType::DATE,
+            )),
+            SqlType::DECIMAL => Ok(DataTypeMap::new(
+                DataType::Decimal128(1, 1),
+                PythonType::Float,
+                SqlType::DECIMAL,
+            )),
+            SqlType::DISTINCT => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::DOUBLE => Ok(DataTypeMap::new(
+                DataType::Decimal256(1, 1),
+                PythonType::Float,
+                SqlType::DOUBLE,
+            )),
+            SqlType::DYNAMIC_STAR => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::FLOAT => Ok(DataTypeMap::new(
+                DataType::Decimal128(1, 1),
+                PythonType::Float,
+                SqlType::FLOAT,
+            )),
+            SqlType::GEOMETRY => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::INTEGER => Ok(DataTypeMap::new(
+                DataType::Int8,
+                PythonType::Int,
+                SqlType::INTEGER,
+            )),
+            SqlType::INTERVAL => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::INTERVAL_DAY => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_DAY_HOUR => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_DAY_MINUTE => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::INTERVAL_DAY_SECOND => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::INTERVAL_HOUR => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_HOUR_MINUTE => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::INTERVAL_HOUR_SECOND => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::INTERVAL_MINUTE => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_MINUTE_SECOND => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::INTERVAL_MONTH => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_SECOND => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_YEAR => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::INTERVAL_YEAR_MONTH => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::MAP => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::MULTISET => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::NULL => Ok(DataTypeMap::new(
+                DataType::Null,
+                PythonType::None,
+                SqlType::NULL,
+            )),
+            SqlType::OTHER => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::REAL => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::ROW => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::SARG => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::SMALLINT => Ok(DataTypeMap::new(
+                DataType::Int16,
+                PythonType::Int,
+                SqlType::SMALLINT,
+            )),
+            SqlType::STRUCTURED => 
Err(py_datafusion_err(DataFusionError::NotImplemented(
+                format!("{:?}", sql_type),
+            ))),
+            SqlType::SYMBOL => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::TIME => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::TIME_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::TIMESTAMP => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
+                DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+            )),
+            SqlType::TINYINT => Ok(DataTypeMap::new(
+                DataType::Int8,
+                PythonType::Int,
+                SqlType::TINYINT,
+            )),
+            SqlType::UNKNOWN => 
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+                "{:?}",
+                sql_type
+            )))),
+            SqlType::VARBINARY => Ok(DataTypeMap::new(
+                DataType::LargeBinary,
+                PythonType::Bytes,
+                SqlType::VARBINARY,
+            )),
+            SqlType::VARCHAR => Ok(DataTypeMap::new(
+                DataType::Utf8,
+                PythonType::Str,
+                SqlType::VARCHAR,
+            )),
+        }
+    }
+}
+
+/// PyO3 requires that objects passed between Rust and Python implement the 
trait `PyClass`
+/// Since `DataType` exists in another package we cannot make that happen here 
so we wrap
+/// `DataType` as `PyDataType` This exists solely to satisfy those constraints.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "DataType", module = "datafusion")]
+pub struct PyDataType {
+    data_type: DataType,
+}
+
+impl From<PyDataType> for DataType {
+    fn from(data_type: PyDataType) -> DataType {
+        data_type.data_type
+    }
+}
+
+impl From<DataType> for PyDataType {
+    fn from(data_type: DataType) -> PyDataType {
+        PyDataType { data_type }
+    }
+}
+
+/// Represents the possible Python types that can be mapped to the SQL types
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "PythonType", module = "datafusion")]
+pub enum PythonType {
+    Array,
+    Bool,
+    Bytes,
+    Datetime,
+    Float,
+    Int,
+    List,
+    None,
+    Object,
+    Str,
+}
+
+/// Represents the types that are possible for DataFusion to parse
+/// from a SQL query. Aka "SqlType" and are valid values for
+/// ANSI SQL
+#[allow(non_camel_case_types)]
+#[allow(clippy::upper_case_acronyms)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "SqlType", module = "datafusion")]
+pub enum SqlType {
+    ANY,
+    ARRAY,
+    BIGINT,
+    BINARY,
+    BOOLEAN,
+    CHAR,
+    COLUMN_LIST,
+    CURSOR,
+    DATE,
+    DECIMAL,
+    DISTINCT,
+    DOUBLE,
+    DYNAMIC_STAR,
+    FLOAT,
+    GEOMETRY,
+    INTEGER,
+    INTERVAL,
+    INTERVAL_DAY,
+    INTERVAL_DAY_HOUR,
+    INTERVAL_DAY_MINUTE,
+    INTERVAL_DAY_SECOND,
+    INTERVAL_HOUR,
+    INTERVAL_HOUR_MINUTE,
+    INTERVAL_HOUR_SECOND,
+    INTERVAL_MINUTE,
+    INTERVAL_MINUTE_SECOND,
+    INTERVAL_MONTH,
+    INTERVAL_SECOND,
+    INTERVAL_YEAR,
+    INTERVAL_YEAR_MONTH,
+    MAP,
+    MULTISET,
+    NULL,
+    OTHER,
+    REAL,
+    ROW,
+    SARG,
+    SMALLINT,
+    STRUCTURED,
+    SYMBOL,
+    TIME,
+    TIME_WITH_LOCAL_TIME_ZONE,
+    TIMESTAMP,
+    TIMESTAMP_WITH_LOCAL_TIME_ZONE,
+    TINYINT,
+    UNKNOWN,
+    VARBINARY,
+    VARCHAR,
+}
diff --git a/src/common/df_field.rs b/src/common/df_field.rs
new file mode 100644
index 0000000..098df9b
--- /dev/null
+++ b/src/common/df_field.rs
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::Field;
+use pyo3::prelude::*;
+
+use crate::common::data_type::DataTypeMap;
+
+/// PyDFField wraps an arrow-datafusion `DFField` struct type
+/// and also supplies convenience methods for interacting
+/// with the `DFField` instance in the context of Python
+#[pyclass(name = "DFField", module = "datafusion", subclass)]
+#[derive(Debug, Clone)]
+pub struct PyDFField {
+    /// Optional qualifier (usually a table or relation name)
+    #[allow(dead_code)]
+    qualifier: Option<String>,
+    #[allow(dead_code)]
+    name: String,
+    #[allow(dead_code)]
+    data_type: DataTypeMap,
+    /// Arrow field definition
+    #[allow(dead_code)]
+    field: Field,
+    #[allow(dead_code)]
+    index: usize,
+}
diff --git a/src/common/mod.rs b/src/common/mod.rs
new file mode 100644
index 0000000..2d37f68
--- /dev/null
+++ b/src/common/mod.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod data_type;
+pub mod df_field;
diff --git a/src/context.rs b/src/context.rs
index 9a992b9..c50d039 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -29,7 +29,7 @@ use crate::catalog::{PyCatalog, PyTable};
 use crate::dataframe::PyDataFrame;
 use crate::dataset::Dataset;
 use crate::errors::DataFusionError;
-use crate::logical::PyLogicalPlan;
+use crate::sql::logical::PyLogicalPlan;
 use crate::store::StorageContexts;
 use crate::udaf::PyAggregateUDF;
 use crate::udf::PyScalarUDF;
diff --git a/src/dataframe.rs b/src/dataframe.rs
index de8d245..0f73757 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -15,8 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::logical::PyLogicalPlan;
 use crate::physical_plan::PyExecutionPlan;
+use crate::sql::logical::PyLogicalPlan;
 use crate::utils::wait_for_future;
 use crate::{errors::DataFusionError, expression::PyExpr};
 use datafusion::arrow::datatypes::Schema;
diff --git a/src/lib.rs b/src/lib.rs
index eda74ff..be699d5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,6 +21,7 @@ use pyo3::prelude::*;
 
 #[allow(clippy::borrow_deref_ref)]
 pub mod catalog;
+pub mod common;
 #[allow(clippy::borrow_deref_ref)]
 mod config;
 #[allow(clippy::borrow_deref_ref)]
@@ -34,9 +35,9 @@ pub mod errors;
 mod expression;
 #[allow(clippy::borrow_deref_ref)]
 mod functions;
-pub mod logical;
 pub mod physical_plan;
 mod pyarrow_filter_expression;
+pub mod sql;
 pub mod store;
 pub mod substrait;
 #[allow(clippy::borrow_deref_ref)]
@@ -65,7 +66,7 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
     m.add_class::<udf::PyScalarUDF>()?;
     m.add_class::<udaf::PyAggregateUDF>()?;
     m.add_class::<config::PyConfig>()?;
-    m.add_class::<logical::PyLogicalPlan>()?;
+    m.add_class::<sql::logical::PyLogicalPlan>()?;
     m.add_class::<physical_plan::PyExecutionPlan>()?;
 
     // Register the functions as a submodule
diff --git a/src/sql.rs b/src/sql.rs
new file mode 100644
index 0000000..9f1fe81
--- /dev/null
+++ b/src/sql.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod exceptions;
+pub mod logical;
diff --git a/src/sql/exceptions.rs b/src/sql/exceptions.rs
new file mode 100644
index 0000000..c458402
--- /dev/null
+++ b/src/sql/exceptions.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::{Debug, Display};
+
+use pyo3::{create_exception, PyErr};
+
+// Identifies exceptions that occur while attempting to generate a 
`LogicalPlan` from a SQL string
+create_exception!(rust, ParsingException, pyo3::exceptions::PyException);
+
+// Identifies exceptions that occur during attempts to optimization an 
existing `LogicalPlan`
+create_exception!(rust, OptimizationException, pyo3::exceptions::PyException);
+
+pub fn py_type_err(e: impl Debug + Display) -> PyErr {
+    PyErr::new::<pyo3::exceptions::PyTypeError, _>(format!("{e}"))
+}
+
+pub fn py_runtime_err(e: impl Debug + Display) -> PyErr {
+    PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("{e}"))
+}
+
+pub fn py_parsing_exp(e: impl Debug + Display) -> PyErr {
+    PyErr::new::<ParsingException, _>(format!("{e}"))
+}
+
+pub fn py_optimization_exp(e: impl Debug + Display) -> PyErr {
+    PyErr::new::<OptimizationException, _>(format!("{e}"))
+}
diff --git a/src/logical.rs b/src/sql/logical.rs
similarity index 100%
rename from src/logical.rs
rename to src/sql/logical.rs
diff --git a/src/substrait.rs b/src/substrait.rs
index b9e5c92..2bde011 100644
--- a/src/substrait.rs
+++ b/src/substrait.rs
@@ -18,9 +18,9 @@
 use pyo3::prelude::*;
 
 use crate::context::PySessionContext;
-use crate::errors::py_datafusion_err;
-use crate::errors::DataFusionError;
-use crate::{logical::PyLogicalPlan, utils::wait_for_future};
+use crate::errors::{py_datafusion_err, DataFusionError};
+use crate::sql::logical::PyLogicalPlan;
+use crate::utils::wait_for_future;
 
 use datafusion_substrait::logical_plan::{consumer, producer};
 use datafusion_substrait::serializer;

Reply via email to