This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/main by this push:
new a53ce95 Arrow type bindings (#173)
a53ce95 is described below
commit a53ce958061c500d3ad2edb396aac2f9ae2943d4
Author: Jeremy Dyer <[email protected]>
AuthorDate: Mon Feb 13 17:13:57 2023 -0500
Arrow type bindings (#173)
* type bindings
* checkpoint commit
* First mappings of types
* map_from_arrow_type
* update LogicalPlan crate location
* add missing apache license to src/sql/exceptions.rs
* cargo fmt
* clippy warnings
* format!() changes
* Add ARM coverage for DataType::RunEndEncoded
* Return DataFusionError instead of unimplemented!()
* Add github access token and address review comments
* small cargo fmt fix
---
.github/workflows/build.yml | 1 +
.github/workflows/test.yaml | 1 +
Cargo.lock | 24 +--
src/common/data_type.rs | 511 ++++++++++++++++++++++++++++++++++++++++++++
src/common/df_field.rs | 41 ++++
src/common/mod.rs | 19 ++
src/context.rs | 2 +-
src/dataframe.rs | 2 +-
src/lib.rs | 5 +-
src/sql.rs | 19 ++
src/sql/exceptions.rs | 42 ++++
src/{ => sql}/logical.rs | 0
src/substrait.rs | 6 +-
13 files changed, 654 insertions(+), 19 deletions(-)
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f457b3d..54fdcfe 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -101,6 +101,7 @@ jobs:
uses: arduino/setup-protoc@v1
with:
version: '3.x'
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Build wheels
uses: PyO3/maturin-action@v1
with:
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 2142ace..164b09e 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -57,6 +57,7 @@ jobs:
uses: arduino/setup-protoc@v1
with:
version: '3.x'
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
- name: Setup Python
uses: actions/setup-python@v4
diff --git a/Cargo.lock b/Cargo.lock
index 6ca4adb..9f50fa3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -618,9 +618,9 @@ dependencies = [
[[package]]
name = "cxx"
-version = "1.0.89"
+version = "1.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc831ee6a32dd495436e317595e639a587aa9907bef96fe6e6abc290ab6204e9"
+checksum = "90d59d9acd2a682b4e40605a242f6670eaa58c5957471cbf85e8aa6a0b97a5e8"
dependencies = [
"cc",
"cxxbridge-flags",
@@ -630,9 +630,9 @@ dependencies = [
[[package]]
name = "cxx-build"
-version = "1.0.89"
+version = "1.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94331d54f1b1a8895cd81049f7eaaaef9d05a7dcb4d1fd08bf3ff0806246789d"
+checksum = "ebfa40bda659dd5c864e65f4c9a2b0aff19bea56b017b9b77c73d3766a453a38"
dependencies = [
"cc",
"codespan-reporting",
@@ -645,15 +645,15 @@ dependencies = [
[[package]]
name = "cxxbridge-flags"
-version = "1.0.89"
+version = "1.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48dcd35ba14ca9b40d6e4b4b39961f23d835dbb8eed74565ded361d93e1feb8a"
+checksum = "457ce6757c5c70dc6ecdbda6925b958aae7f959bda7d8fb9bde889e34a09dc03"
[[package]]
name = "cxxbridge-macro"
-version = "1.0.89"
+version = "1.0.90"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81bbeb29798b407ccd82a3324ade1a7286e0d29851475990b612670f6f5124d2"
+checksum = "ebf883b7aacd7b2aeb2a7b338648ee19f57c140d4ee8e52c68979c6b2f7f2263"
dependencies = [
"proc-macro2",
"quote",
@@ -2515,9 +2515,9 @@ dependencies = [
[[package]]
name = "target-lexicon"
-version = "0.12.5"
+version = "0.12.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9410d0f6853b1d94f0e519fb95df60f29d2c1eff2d921ffdf01a4c8a3b54f12d"
+checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5"
[[package]]
name = "tempfile"
@@ -2662,9 +2662,9 @@ dependencies = [
[[package]]
name = "tokio-util"
-version = "0.7.6"
+version = "0.7.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc6a3b08b64e6dfad376fa2432c7b1f01522e37a623c3050bc95db2d3ff21583"
+checksum = "5427d89453009325de0d8f342c9490009f76e999cb7672d77e46267448f7e6b2"
dependencies = [
"bytes",
"futures-core",
diff --git a/src/common/data_type.rs b/src/common/data_type.rs
new file mode 100644
index 0000000..8ada1c7
--- /dev/null
+++ b/src/common/data_type.rs
@@ -0,0 +1,511 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::DataType;
+use datafusion_common::DataFusionError;
+use pyo3::prelude::*;
+
+use crate::errors::py_datafusion_err;
+
+/// These bindings are tying together several disparate systems.
+/// You have SQL types for the SQL strings and RDBMS systems itself.
+/// Rust types for the DataFusion code
+/// Arrow types which represents the underlying arrow format
+/// Python types which represent the type in Python
+/// It is important to keep all of those types in a single
+/// and managable location. Therefore this structure exists
+/// to map those types and provide a simple place for developers
+/// to map types from one system to another.
+#[derive(Debug, Clone)]
+#[pyclass(name = "DataTypeMap", module = "datafusion", subclass)]
+pub struct DataTypeMap {
+ #[allow(dead_code)]
+ arrow_type: PyDataType,
+ #[allow(dead_code)]
+ python_type: PythonType,
+ #[allow(dead_code)]
+ sql_type: SqlType,
+}
+
+impl DataTypeMap {
+ fn new(arrow_type: DataType, python_type: PythonType, sql_type: SqlType)
-> Self {
+ DataTypeMap {
+ arrow_type: PyDataType {
+ data_type: arrow_type,
+ },
+ python_type,
+ sql_type,
+ }
+ }
+
+ pub fn map_from_arrow_type(arrow_type: &DataType) -> Result<DataTypeMap,
PyErr> {
+ match arrow_type {
+ DataType::Null => Ok(DataTypeMap::new(
+ DataType::Null,
+ PythonType::None,
+ SqlType::NULL,
+ )),
+ DataType::Boolean => Ok(DataTypeMap::new(
+ DataType::Boolean,
+ PythonType::Bool,
+ SqlType::BOOLEAN,
+ )),
+ DataType::Int8 => Ok(DataTypeMap::new(
+ DataType::Int8,
+ PythonType::Int,
+ SqlType::TINYINT,
+ )),
+ DataType::Int16 => Ok(DataTypeMap::new(
+ DataType::Int16,
+ PythonType::Int,
+ SqlType::SMALLINT,
+ )),
+ DataType::Int32 => Ok(DataTypeMap::new(
+ DataType::Int32,
+ PythonType::Int,
+ SqlType::INTEGER,
+ )),
+ DataType::Int64 => Ok(DataTypeMap::new(
+ DataType::Int64,
+ PythonType::Int,
+ SqlType::BIGINT,
+ )),
+ DataType::UInt8 => Ok(DataTypeMap::new(
+ DataType::UInt8,
+ PythonType::Int,
+ SqlType::TINYINT,
+ )),
+ DataType::UInt16 => Ok(DataTypeMap::new(
+ DataType::UInt16,
+ PythonType::Int,
+ SqlType::SMALLINT,
+ )),
+ DataType::UInt32 => Ok(DataTypeMap::new(
+ DataType::UInt32,
+ PythonType::Int,
+ SqlType::INTEGER,
+ )),
+ DataType::UInt64 => Ok(DataTypeMap::new(
+ DataType::UInt64,
+ PythonType::Int,
+ SqlType::BIGINT,
+ )),
+ DataType::Float16 => Ok(DataTypeMap::new(
+ DataType::Float16,
+ PythonType::Float,
+ SqlType::FLOAT,
+ )),
+ DataType::Float32 => Ok(DataTypeMap::new(
+ DataType::Float32,
+ PythonType::Float,
+ SqlType::FLOAT,
+ )),
+ DataType::Float64 => Ok(DataTypeMap::new(
+ DataType::Float64,
+ PythonType::Float,
+ SqlType::FLOAT,
+ )),
+ DataType::Timestamp(_, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Date32 => Ok(DataTypeMap::new(
+ DataType::Date32,
+ PythonType::Datetime,
+ SqlType::DATE,
+ )),
+ DataType::Date64 => Ok(DataTypeMap::new(
+ DataType::Date64,
+ PythonType::Datetime,
+ SqlType::DATE,
+ )),
+ DataType::Time32(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Time64(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Duration(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Interval(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Binary => Ok(DataTypeMap::new(
+ DataType::Binary,
+ PythonType::Bytes,
+ SqlType::BINARY,
+ )),
+ DataType::FixedSizeBinary(_) => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+ )),
+ DataType::LargeBinary => Ok(DataTypeMap::new(
+ DataType::LargeBinary,
+ PythonType::Bytes,
+ SqlType::BINARY,
+ )),
+ DataType::Utf8 => Ok(DataTypeMap::new(
+ DataType::Utf8,
+ PythonType::Str,
+ SqlType::VARCHAR,
+ )),
+ DataType::LargeUtf8 => Ok(DataTypeMap::new(
+ DataType::LargeUtf8,
+ PythonType::Str,
+ SqlType::VARCHAR,
+ )),
+ DataType::List(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ arrow_type
+ )))),
+ DataType::FixedSizeList(_, _) => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+ )),
+ DataType::LargeList(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Struct(_) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Union(_, _, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Dictionary(_, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Decimal128(_, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Decimal256(_, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::Map(_, _) =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", arrow_type),
+ ))),
+ DataType::RunEndEncoded(_, _) => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", arrow_type)),
+ )),
+ }
+ }
+}
+
+#[pymethods]
+impl DataTypeMap {
+ #[new]
+ pub fn py_new(arrow_type: PyDataType, python_type: PythonType, sql_type:
SqlType) -> Self {
+ DataTypeMap {
+ arrow_type,
+ python_type,
+ sql_type,
+ }
+ }
+
+ #[staticmethod]
+ #[pyo3(name = "arrow")]
+ pub fn py_map_from_arrow_type(arrow_type: &PyDataType) ->
PyResult<DataTypeMap> {
+ DataTypeMap::map_from_arrow_type(&arrow_type.data_type)
+ }
+
+ #[staticmethod]
+ #[pyo3(name = "sql")]
+ pub fn py_map_from_sql_type(sql_type: &SqlType) -> PyResult<DataTypeMap> {
+ match sql_type {
+ SqlType::ANY =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::ARRAY =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::BIGINT => Ok(DataTypeMap::new(
+ DataType::Int64,
+ PythonType::Int,
+ SqlType::BIGINT,
+ )),
+ SqlType::BINARY => Ok(DataTypeMap::new(
+ DataType::Binary,
+ PythonType::Bytes,
+ SqlType::BINARY,
+ )),
+ SqlType::BOOLEAN => Ok(DataTypeMap::new(
+ DataType::Boolean,
+ PythonType::Bool,
+ SqlType::BOOLEAN,
+ )),
+ SqlType::CHAR => Ok(DataTypeMap::new(
+ DataType::UInt8,
+ PythonType::Int,
+ SqlType::CHAR,
+ )),
+ SqlType::COLUMN_LIST =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::CURSOR =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::DATE => Ok(DataTypeMap::new(
+ DataType::Date64,
+ PythonType::Datetime,
+ SqlType::DATE,
+ )),
+ SqlType::DECIMAL => Ok(DataTypeMap::new(
+ DataType::Decimal128(1, 1),
+ PythonType::Float,
+ SqlType::DECIMAL,
+ )),
+ SqlType::DISTINCT =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::DOUBLE => Ok(DataTypeMap::new(
+ DataType::Decimal256(1, 1),
+ PythonType::Float,
+ SqlType::DOUBLE,
+ )),
+ SqlType::DYNAMIC_STAR =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::FLOAT => Ok(DataTypeMap::new(
+ DataType::Decimal128(1, 1),
+ PythonType::Float,
+ SqlType::FLOAT,
+ )),
+ SqlType::GEOMETRY =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::INTEGER => Ok(DataTypeMap::new(
+ DataType::Int8,
+ PythonType::Int,
+ SqlType::INTEGER,
+ )),
+ SqlType::INTERVAL =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::INTERVAL_DAY =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_DAY_HOUR =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_DAY_MINUTE => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::INTERVAL_DAY_SECOND => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::INTERVAL_HOUR =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_HOUR_MINUTE => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::INTERVAL_HOUR_SECOND => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::INTERVAL_MINUTE =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_MINUTE_SECOND => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::INTERVAL_MONTH =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_SECOND =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_YEAR =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::INTERVAL_YEAR_MONTH => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::MAP =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::MULTISET =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::NULL => Ok(DataTypeMap::new(
+ DataType::Null,
+ PythonType::None,
+ SqlType::NULL,
+ )),
+ SqlType::OTHER =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::REAL =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::ROW =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::SARG =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::SMALLINT => Ok(DataTypeMap::new(
+ DataType::Int16,
+ PythonType::Int,
+ SqlType::SMALLINT,
+ )),
+ SqlType::STRUCTURED =>
Err(py_datafusion_err(DataFusionError::NotImplemented(
+ format!("{:?}", sql_type),
+ ))),
+ SqlType::SYMBOL =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::TIME =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::TIME_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::TIMESTAMP =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::TIMESTAMP_WITH_LOCAL_TIME_ZONE => Err(py_datafusion_err(
+ DataFusionError::NotImplemented(format!("{:?}", sql_type)),
+ )),
+ SqlType::TINYINT => Ok(DataTypeMap::new(
+ DataType::Int8,
+ PythonType::Int,
+ SqlType::TINYINT,
+ )),
+ SqlType::UNKNOWN =>
Err(py_datafusion_err(DataFusionError::NotImplemented(format!(
+ "{:?}",
+ sql_type
+ )))),
+ SqlType::VARBINARY => Ok(DataTypeMap::new(
+ DataType::LargeBinary,
+ PythonType::Bytes,
+ SqlType::VARBINARY,
+ )),
+ SqlType::VARCHAR => Ok(DataTypeMap::new(
+ DataType::Utf8,
+ PythonType::Str,
+ SqlType::VARCHAR,
+ )),
+ }
+ }
+}
+
+/// PyO3 requires that objects passed between Rust and Python implement the
trait `PyClass`
+/// Since `DataType` exists in another package we cannot make that happen here
so we wrap
+/// `DataType` as `PyDataType` This exists solely to satisfy those constraints.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "DataType", module = "datafusion")]
+pub struct PyDataType {
+ data_type: DataType,
+}
+
+impl From<PyDataType> for DataType {
+ fn from(data_type: PyDataType) -> DataType {
+ data_type.data_type
+ }
+}
+
+impl From<DataType> for PyDataType {
+ fn from(data_type: DataType) -> PyDataType {
+ PyDataType { data_type }
+ }
+}
+
+/// Represents the possible Python types that can be mapped to the SQL types
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "PythonType", module = "datafusion")]
+pub enum PythonType {
+ Array,
+ Bool,
+ Bytes,
+ Datetime,
+ Float,
+ Int,
+ List,
+ None,
+ Object,
+ Str,
+}
+
+/// Represents the types that are possible for DataFusion to parse
+/// from a SQL query. Aka "SqlType" and are valid values for
+/// ANSI SQL
+#[allow(non_camel_case_types)]
+#[allow(clippy::upper_case_acronyms)]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[pyclass(name = "SqlType", module = "datafusion")]
+pub enum SqlType {
+ ANY,
+ ARRAY,
+ BIGINT,
+ BINARY,
+ BOOLEAN,
+ CHAR,
+ COLUMN_LIST,
+ CURSOR,
+ DATE,
+ DECIMAL,
+ DISTINCT,
+ DOUBLE,
+ DYNAMIC_STAR,
+ FLOAT,
+ GEOMETRY,
+ INTEGER,
+ INTERVAL,
+ INTERVAL_DAY,
+ INTERVAL_DAY_HOUR,
+ INTERVAL_DAY_MINUTE,
+ INTERVAL_DAY_SECOND,
+ INTERVAL_HOUR,
+ INTERVAL_HOUR_MINUTE,
+ INTERVAL_HOUR_SECOND,
+ INTERVAL_MINUTE,
+ INTERVAL_MINUTE_SECOND,
+ INTERVAL_MONTH,
+ INTERVAL_SECOND,
+ INTERVAL_YEAR,
+ INTERVAL_YEAR_MONTH,
+ MAP,
+ MULTISET,
+ NULL,
+ OTHER,
+ REAL,
+ ROW,
+ SARG,
+ SMALLINT,
+ STRUCTURED,
+ SYMBOL,
+ TIME,
+ TIME_WITH_LOCAL_TIME_ZONE,
+ TIMESTAMP,
+ TIMESTAMP_WITH_LOCAL_TIME_ZONE,
+ TINYINT,
+ UNKNOWN,
+ VARBINARY,
+ VARCHAR,
+}
diff --git a/src/common/df_field.rs b/src/common/df_field.rs
new file mode 100644
index 0000000..098df9b
--- /dev/null
+++ b/src/common/df_field.rs
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use datafusion::arrow::datatypes::Field;
+use pyo3::prelude::*;
+
+use crate::common::data_type::DataTypeMap;
+
+/// PyDFField wraps an arrow-datafusion `DFField` struct type
+/// and also supplies convenience methods for interacting
+/// with the `DFField` instance in the context of Python
+#[pyclass(name = "DFField", module = "datafusion", subclass)]
+#[derive(Debug, Clone)]
+pub struct PyDFField {
+ /// Optional qualifier (usually a table or relation name)
+ #[allow(dead_code)]
+ qualifier: Option<String>,
+ #[allow(dead_code)]
+ name: String,
+ #[allow(dead_code)]
+ data_type: DataTypeMap,
+ /// Arrow field definition
+ #[allow(dead_code)]
+ field: Field,
+ #[allow(dead_code)]
+ index: usize,
+}
diff --git a/src/common/mod.rs b/src/common/mod.rs
new file mode 100644
index 0000000..2d37f68
--- /dev/null
+++ b/src/common/mod.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod data_type;
+pub mod df_field;
diff --git a/src/context.rs b/src/context.rs
index 9a992b9..c50d039 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -29,7 +29,7 @@ use crate::catalog::{PyCatalog, PyTable};
use crate::dataframe::PyDataFrame;
use crate::dataset::Dataset;
use crate::errors::DataFusionError;
-use crate::logical::PyLogicalPlan;
+use crate::sql::logical::PyLogicalPlan;
use crate::store::StorageContexts;
use crate::udaf::PyAggregateUDF;
use crate::udf::PyScalarUDF;
diff --git a/src/dataframe.rs b/src/dataframe.rs
index de8d245..0f73757 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -15,8 +15,8 @@
// specific language governing permissions and limitations
// under the License.
-use crate::logical::PyLogicalPlan;
use crate::physical_plan::PyExecutionPlan;
+use crate::sql::logical::PyLogicalPlan;
use crate::utils::wait_for_future;
use crate::{errors::DataFusionError, expression::PyExpr};
use datafusion::arrow::datatypes::Schema;
diff --git a/src/lib.rs b/src/lib.rs
index eda74ff..be699d5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,6 +21,7 @@ use pyo3::prelude::*;
#[allow(clippy::borrow_deref_ref)]
pub mod catalog;
+pub mod common;
#[allow(clippy::borrow_deref_ref)]
mod config;
#[allow(clippy::borrow_deref_ref)]
@@ -34,9 +35,9 @@ pub mod errors;
mod expression;
#[allow(clippy::borrow_deref_ref)]
mod functions;
-pub mod logical;
pub mod physical_plan;
mod pyarrow_filter_expression;
+pub mod sql;
pub mod store;
pub mod substrait;
#[allow(clippy::borrow_deref_ref)]
@@ -65,7 +66,7 @@ fn _internal(py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<udf::PyScalarUDF>()?;
m.add_class::<udaf::PyAggregateUDF>()?;
m.add_class::<config::PyConfig>()?;
- m.add_class::<logical::PyLogicalPlan>()?;
+ m.add_class::<sql::logical::PyLogicalPlan>()?;
m.add_class::<physical_plan::PyExecutionPlan>()?;
// Register the functions as a submodule
diff --git a/src/sql.rs b/src/sql.rs
new file mode 100644
index 0000000..9f1fe81
--- /dev/null
+++ b/src/sql.rs
@@ -0,0 +1,19 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+pub mod exceptions;
+pub mod logical;
diff --git a/src/sql/exceptions.rs b/src/sql/exceptions.rs
new file mode 100644
index 0000000..c458402
--- /dev/null
+++ b/src/sql/exceptions.rs
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::fmt::{Debug, Display};
+
+use pyo3::{create_exception, PyErr};
+
+// Identifies exceptions that occur while attempting to generate a
`LogicalPlan` from a SQL string
+create_exception!(rust, ParsingException, pyo3::exceptions::PyException);
+
+// Identifies exceptions that occur during attempts to optimization an
existing `LogicalPlan`
+create_exception!(rust, OptimizationException, pyo3::exceptions::PyException);
+
+pub fn py_type_err(e: impl Debug + Display) -> PyErr {
+ PyErr::new::<pyo3::exceptions::PyTypeError, _>(format!("{e}"))
+}
+
+pub fn py_runtime_err(e: impl Debug + Display) -> PyErr {
+ PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("{e}"))
+}
+
+pub fn py_parsing_exp(e: impl Debug + Display) -> PyErr {
+ PyErr::new::<ParsingException, _>(format!("{e}"))
+}
+
+pub fn py_optimization_exp(e: impl Debug + Display) -> PyErr {
+ PyErr::new::<OptimizationException, _>(format!("{e}"))
+}
diff --git a/src/logical.rs b/src/sql/logical.rs
similarity index 100%
rename from src/logical.rs
rename to src/sql/logical.rs
diff --git a/src/substrait.rs b/src/substrait.rs
index b9e5c92..2bde011 100644
--- a/src/substrait.rs
+++ b/src/substrait.rs
@@ -18,9 +18,9 @@
use pyo3::prelude::*;
use crate::context::PySessionContext;
-use crate::errors::py_datafusion_err;
-use crate::errors::DataFusionError;
-use crate::{logical::PyLogicalPlan, utils::wait_for_future};
+use crate::errors::{py_datafusion_err, DataFusionError};
+use crate::sql::logical::PyLogicalPlan;
+use crate::utils::wait_for_future;
use datafusion_substrait::logical_plan::{consumer, producer};
use datafusion_substrait::serializer;