Aitozi commented on code in PR #18: URL: https://github.com/apache/paimon-rust/pull/18#discussion_r1686437950
########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L224> + fn copy(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root.clone(), + } + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L231> + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /// Returns true if the data type is with the family. Review Comment: ditto ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> Review Comment: The line number seems not correct. ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L224> + fn copy(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root.clone(), + } + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L231> + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L240> + fn copy_ignore_nullable(&self) -> Self { + self.copy(false) + } + + fn as_sql_string(&self) -> String { + match self.type_root { + DataTypeRoot::Char => "CHAR".to_string(), + DataTypeRoot::Varchar => "VARCHAR".to_string(), + DataTypeRoot::Boolean => "BOOLEAN".to_string(), + DataTypeRoot::Binary => "BINARY".to_string(), + DataTypeRoot::Varbinary => "VARBINARY".to_string(), + DataTypeRoot::Decimal => "DECIMAL".to_string(), + DataTypeRoot::Tinyint => "TINYINT".to_string(), + DataTypeRoot::Smallint => "SMALLINT".to_string(), + DataTypeRoot::Integer => "INTEGER".to_string(), + DataTypeRoot::Bigint => "BIGINT".to_string(), + DataTypeRoot::Float => "FLOAT".to_string(), + DataTypeRoot::Double => "DOUBLE".to_string(), + DataTypeRoot::Date => "DATE".to_string(), + DataTypeRoot::TimeWithoutTimeZone => "TIME".to_string(), + DataTypeRoot::TimestampWithoutTimeZone => "TIMESTAMP".to_string(), + DataTypeRoot::TimestampWithLocalTimeZone => { + "TIMESTAMP WITH LOCAL TIME ZONE".to_string() + } + DataTypeRoot::Array => "ARRAY".to_string(), + DataTypeRoot::Multiset => "MULTISET".to_string(), + DataTypeRoot::Map => "MAP".to_string(), + DataTypeRoot::Row => "ROW".to_string(), + } + } + + fn serialize_json(&self) -> String { + serde_json::to_string(self).unwrap() + } + + fn with_nullability(&self, _format: &str, params: &[&str]) -> String { + if !self.is_nullable() { + format!("{}{} NOT NULL", _format, params.concat()) + } else { + format!("{}{}", _format, params.concat()) + } + } + + fn accept<T>(&self, visitor: &mut T) + where + T: DataTypeVisitor<T>, + { + visitor.visit(self); + } + + fn not_null(&self) -> Self { + self.copy(false) + } + + fn nullable(&self) -> Self { + self.copy(true) + } +} + +/// ArrayType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/ArrayType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +#[serde(rename_all = "camelCase")] +pub struct ArrayType { + pub element_type: DataType, +} + +impl ArrayType { + pub fn new(is_nullable: bool) -> Self { + Self { + element_type: DataType::new(is_nullable, DataTypeRoot::Array), + } + } + + pub fn default_value() -> Self { + Self::new(true) + } + + pub fn as_sql_string(&self) -> String { + format!("ARRAY<{}>", self.element_type.as_sql_string()) Review Comment: The `nullability` is not displayed in the sql string. ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L224> + fn copy(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root.clone(), + } + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L231> + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L240> + fn copy_ignore_nullable(&self) -> Self { + self.copy(false) + } + + fn as_sql_string(&self) -> String { Review Comment: This method should be moved to the subclass, Right? ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. Review Comment: Incorrect comment ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. Review Comment: Incorrect comment ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,956 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use bitflags::bitflags; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; + +bitflags! { +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] + pub struct DataTypeFamily: u32 { + const PREDEFINED = 1 << 0; + const CONSTRUCTED = 1 << 1; + const CHARACTER_STRING = 1 << 2; + const BINARY_STRING = 1 << 3; + const NUMERIC = 1 << 4; + const INTEGER_NUMERIC = 1 << 5; + const EXACT_NUMERIC = 1 << 6; + const APPROXIMATE_NUMERIC = 1 << 7; + const DATETIME = 1 << 8; + const TIME = 1 << 9; + const TIMESTAMP = 1 << 10; + const COLLECTION = 1 << 11; + const EXTENSION = 1 << 12; + } +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub enum DataTypeRoot { + Char, + Varchar, + Boolean, + Binary, + Varbinary, + Decimal, + Tinyint, + Smallint, + Integer, + Bigint, + Float, + Double, + Date, + TimeWithoutTimeZone, + TimestampWithoutTimeZone, + TimestampWithLocalTimeZone, + Array, + Multiset, + Map, + Row, +} + +impl DataTypeRoot { + pub fn families(&self) -> DataTypeFamily { + match self { + Self::Char => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Varchar => DataTypeFamily::PREDEFINED | DataTypeFamily::CHARACTER_STRING, + Self::Boolean => DataTypeFamily::PREDEFINED, + Self::Binary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Varbinary => DataTypeFamily::PREDEFINED | DataTypeFamily::BINARY_STRING, + Self::Decimal => { + DataTypeFamily::PREDEFINED | DataTypeFamily::NUMERIC | DataTypeFamily::EXACT_NUMERIC + } + Self::Tinyint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Smallint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Integer => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Bigint => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::INTEGER_NUMERIC + | DataTypeFamily::EXACT_NUMERIC + } + Self::Float => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Double => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::NUMERIC + | DataTypeFamily::APPROXIMATE_NUMERIC + } + Self::Date => DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME, + Self::TimeWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIME + } + Self::TimestampWithoutTimeZone => { + DataTypeFamily::PREDEFINED | DataTypeFamily::DATETIME | DataTypeFamily::TIMESTAMP + } + Self::TimestampWithLocalTimeZone => { + DataTypeFamily::PREDEFINED + | DataTypeFamily::DATETIME + | DataTypeFamily::TIMESTAMP + | DataTypeFamily::EXTENSION + } + Self::Array => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Multiset => DataTypeFamily::CONSTRUCTED | DataTypeFamily::COLLECTION, + Self::Map => DataTypeFamily::CONSTRUCTED | DataTypeFamily::EXTENSION, + Self::Row => DataTypeFamily::CONSTRUCTED, + } + } +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&mut self, data_type: &DataType) -> R; +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +impl Display for DataType { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.as_sql_string()) + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +#[allow(dead_code)] +impl DataType { + fn new(is_nullable: bool, type_root: DataTypeRoot) -> Self { + Self { + is_nullable, + type_root, + } + } + + /// Returns true if the data type is nullable. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L59> + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /// Returns the root of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L66> + fn get_type_root(&self) -> &DataTypeRoot { + &self.type_root + } + + /// Returns the family of the data type. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L186> + fn is(&self, type_root: &DataTypeRoot) -> bool { + &self.type_root == type_root + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L195> + fn is_with_family(&self, family: DataTypeFamily) -> bool { + self.type_root.families().contains(family) + } + + /// Returns true if the data type is with the family. + /// + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L204> + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr: &DataTypeRoot| self.is(tr)) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L214> + fn is_any_of_family(&self, families: &[DataTypeFamily]) -> bool { + families + .iter() + .any(|f: &DataTypeFamily| self.is_with_family(f.clone())) + } + + /// Returns true if the data type is with the family. + /// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L224> + fn copy(&self, is_nullable: bool) -> Self { + Self { + is_nullable, + type_root: self.type_root.clone(), + } + } + + /// Returns true if the data type is with the family. Review Comment: Incorrect comment -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
