Xuanwo commented on code in PR #18: URL: https://github.com/apache/paimon-rust/pull/18#discussion_r1681153598
########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,1463 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; + +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DataTypeFamily { + Predefined, + Constructed, + CharacterString, + BinaryString, + Numeric, + IntegerNumeric, + ExactNumeric, + ApproximateNumeric, + DateTime, + Time, + Timestamp, + Collection, + Extension, +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&self, _type: &DataType) -> R; +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[repr(u8)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all("camelCase"))] +pub enum DataTypeRoot { Review Comment: Are we implementing bitset? How about using [`bitflags`](https://docs.rs/bitflags/latest/bitflags/) instead? ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,1463 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; + +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DataTypeFamily { + Predefined, + Constructed, + CharacterString, + BinaryString, + Numeric, + IntegerNumeric, + ExactNumeric, + ApproximateNumeric, + DateTime, + Time, + Timestamp, + Collection, + Extension, +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&self, _type: &DataType) -> R; +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[repr(u8)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all("camelCase"))] +pub enum DataTypeRoot { + Char(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Varchar(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Boolean(DataTypeFamily::Predefined), + + Binary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + VarBinary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + Decimal( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ExactNumeric, + ), + + TinyInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + SmallInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Integer( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + BigInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Float( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Double( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Date(DataTypeFamily::Predefined, DataTypeFamily::DateTime), + + TimeWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Time, + ), + + TimestampWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + ), + + TimestampWithLocalTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + DataTypeFamily::Extension, + ), + + Array(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Multiset(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Map(DataTypeFamily::Constructed, DataTypeFamily::Extension), + + Row(DataTypeFamily::Constructed), +} + +struct DataTypeRoot { + families: HashSet<DataTypeFamily>, +} + +impl DataTypeRoot { + fn new(families: &[DataTypeFamily]) -> Self { + let set: HashSet<DataTypeFamily> = families.iter().cloned().collect::<HashSet<_>>(); + DataTypeRoot { families: set } + } + + pub fn get_families(&self) -> &HashSet<DataTypeFamily> { + &self.families + } +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +pub trait DataTypeTrait { Review Comment: Question: We need this trait because we want users to have their own data type? ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,1463 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; + +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DataTypeFamily { + Predefined, + Constructed, + CharacterString, + BinaryString, + Numeric, + IntegerNumeric, + ExactNumeric, + ApproximateNumeric, + DateTime, + Time, + Timestamp, + Collection, + Extension, +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&self, _type: &DataType) -> R; +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[repr(u8)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all("camelCase"))] +pub enum DataTypeRoot { + Char(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Varchar(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Boolean(DataTypeFamily::Predefined), + + Binary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + VarBinary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + Decimal( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ExactNumeric, + ), + + TinyInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + SmallInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Integer( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + BigInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Float( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Double( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Date(DataTypeFamily::Predefined, DataTypeFamily::DateTime), + + TimeWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Time, + ), + + TimestampWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + ), + + TimestampWithLocalTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + DataTypeFamily::Extension, + ), + + Array(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Multiset(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Map(DataTypeFamily::Constructed, DataTypeFamily::Extension), + + Row(DataTypeFamily::Constructed), +} + +struct DataTypeRoot { + families: HashSet<DataTypeFamily>, +} + +impl DataTypeRoot { + fn new(families: &[DataTypeFamily]) -> Self { + let set: HashSet<DataTypeFamily> = families.iter().cloned().collect::<HashSet<_>>(); + DataTypeRoot { families: set } + } + + pub fn get_families(&self) -> &HashSet<DataTypeFamily> { + &self.families + } +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +pub trait DataTypeTrait { + /** Returns whether a value of this type can be {@code null}. */ Review Comment: Hi, please use `///` for comments. ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,1463 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; + +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DataTypeFamily { + Predefined, + Constructed, + CharacterString, + BinaryString, + Numeric, + IntegerNumeric, + ExactNumeric, + ApproximateNumeric, + DateTime, + Time, + Timestamp, + Collection, + Extension, +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&self, _type: &DataType) -> R; +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[repr(u8)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all("camelCase"))] +pub enum DataTypeRoot { + Char(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Varchar(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Boolean(DataTypeFamily::Predefined), + + Binary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + VarBinary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + Decimal( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ExactNumeric, + ), + + TinyInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + SmallInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Integer( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + BigInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Float( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Double( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Date(DataTypeFamily::Predefined, DataTypeFamily::DateTime), + + TimeWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Time, + ), + + TimestampWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + ), + + TimestampWithLocalTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + DataTypeFamily::Extension, + ), + + Array(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Multiset(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Map(DataTypeFamily::Constructed, DataTypeFamily::Extension), + + Row(DataTypeFamily::Constructed), +} + +struct DataTypeRoot { + families: HashSet<DataTypeFamily>, +} + +impl DataTypeRoot { + fn new(families: &[DataTypeFamily]) -> Self { + let set: HashSet<DataTypeFamily> = families.iter().cloned().collect::<HashSet<_>>(); + DataTypeRoot { families: set } + } + + pub fn get_families(&self) -> &HashSet<DataTypeFamily> { + &self.families + } +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +pub trait DataTypeTrait { + /** Returns whether a value of this type can be {@code null}. */ + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /** + * Returns the root of this type. It is an essential description without additional parameters. + */ + fn get_type_root(&self) -> &DataTypeRoot { + self.type_root + } + + /** + * Returns whether the root of the type equals to the {@code typeRoot} or not. + * + * @param typeRoot The root type to check against for equality + */ + fn is(&self, type_root: &DataTypeRoot) -> bool { + self.type_root == type_root + } + + /** + * Returns whether the family type of the type equals to the {@code family} or not. + * + * @param family The family type to check against for equality + */ + fn is_with_family(&self, family: &DataTypeFamily) -> bool { + self.type_root.families.contains(family) + } + + /** + * Returns whether the root of the type equals to at least on of the {@code typeRoots} or not. + * + * @param typeRoots The root types to check against for equality + */ + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr| self.is(tr)) + } + + /** + * Returns whether the root of the type is part of at least one family of the {@code typeFamily} + * or not. + * + * @param typeFamilies The families to check against for equality + */ + fn is_any_of_with_family(&self, type_families: &[DataTypeFamily]) -> bool { + type_families.iter().any(|tf| self.families.contains(tf)) + } + + /** + * Returns a deep copy of this type with possibly different nullability. + * + * @param isNullable the intended nullability of the copied type + * @return a deep copy + */ + fn copy(&self, is_nullable: bool) -> Self; + + /** + * Returns a deep copy of this type. It requires an implementation of {@link #copy(boolean)}. + * + * @return a deep copy + */ + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /** + * Compare two data types without nullable. + * + * @param o the target data type + */ + fn copy_ignore_nullable(&self) -> Self { + self.copy(false) + } + + /** + * Compare two data types without nullable. + * + * @param o the target data type + */ + fn equals_ignore_nullable(&self, other: &Self) -> bool { + self.copy(true) == other.copy(true) + } + + fn equals(&self, other: &Self) -> bool { + self.is_nullable == other.is_nullable + && self.type_root == other.type_root + && self.families == other.families + } + + fn hash_code<T: Hash, U: Hash>(t: &T, u: &U) -> u64 { + let mut s: DefaultHasher = DefaultHasher::new(); + t.hash(&mut s); + u.hash(&mut s); + s.finish() + } + + /** + * Returns a string that summarizes this type as SQL standard string for printing to a console. + * An implementation might shorten long names or skips very specific properties. + */ + fn as_sql_string(&self) -> String; + + fn serialize_json( + &self, + writer: &mut impl std::io::Write, + ) -> Result<(), serde_json::Error> { + to_writer(writer, &Value::String(self.as_sql_string()))?; + Ok(()) + } + + fn with_nullability(&self, format: &str, params: Vec<&dyn ToString>) -> String { + if !self.is_nullable { + format!( + "{} NOT NULL", + format( + format, + params + .into_iter() + .map(|p| p.to_string()) + .collect::<Vec<_>>() + .join(", ") + ) + ) + } else { + format( + format, + params + .into_iter() + .map(|p| p.to_string()) + .collect::<Vec<_>>() + .join(", "), + ) + } + } + + fn to_string(&self) -> String { + self.as_sql_string() + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R; + + fn collect_field_ids(&self, field_ids: &mut HashSet<i32>) {} + + fn not_null(&self) -> Self { + self.copy(false) + } + + fn nullable(&self) -> Self { + self.copy(true) + } +} + +impl DataTypeTrait for DataType { + fn copy(&self, is_nullable: bool) -> Self { + panic!("Unsupported copy operation."); + } + + fn as_sql_string(&self) -> String { + panic!("Unsupported as_sql_string operation."); + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + panic!("Unsupported accept operation."); + } +} + +impl DataType { + pub fn new(is_nullable: bool, root: DataTypeRoot) -> Box<dyn DataType> { + DataType { + is_nullable, + type_root: root, + } + } +} + +impl Display for DataType { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +/// ArrayType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/ArrayType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ArrayType { + pub element_type: DataType, +} + +impl DataTypeTrait for ArrayType { + fn copy(&self, is_nullable: bool) -> Self { + ArrayType::new(is_nullable, self.element_type) + } + + fn as_sql_string(&self) -> String { + format!(Self::FORMAT.replace("%s", &self.get_element_type().as_sql_string())) + + if self.data_type.is_nullable { + " NULL" + } else { + "" + } + } + + fn serialize_json<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut state = serializer.serialize_struct("ArrayType", 2)?; + state.serialize_field( + "type", + if self.data_type.is_nullable { + "ARRAY" + } else { + "ARRAY NOT NULL" + }, + )?; + state.serialize_field("element", &self.get_element_type())?; + state.end() + } + + fn equals(&self, other: &Self) -> bool { Review Comment: Rust doesn't implement `equals` or `hash` in this way. We need to use `Hash` and `Eq`/`PartialEq`. ########## crates/paimon/src/spec/types.rs: ########## @@ -0,0 +1,1463 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::error::Error; +use serde::{Deserialize, Serialize}; +use std::fmt::{Display, Formatter}; +use std::str::FromStr; +use std::collections::hash_map::DefaultHasher; +use std::collections::HashSet; +use std::hash::{Hash, Hasher}; + +/// An enumeration of Data type families for clustering {@link DataTypeRoot}s into categories. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DataTypeFamily.java> +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DataTypeFamily { + Predefined, + Constructed, + CharacterString, + BinaryString, + Numeric, + IntegerNumeric, + ExactNumeric, + ApproximateNumeric, + DateTime, + Time, + Timestamp, + Collection, + Extension, +} + +/// A visitor that can visit different data types. +pub trait DataTypeVisitor<R> { + fn visit(&self, _type: &DataType) -> R; +} + +/// The root of data type. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataTypeRoot.java#L49> +#[repr(u8)] +#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[serde(rename_all("camelCase"))] +pub enum DataTypeRoot { + Char(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Varchar(DataTypeFamily::Predefined, DataTypeFamily::CharacterString), + + Boolean(DataTypeFamily::Predefined), + + Binary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + VarBinary(DataTypeFamily::Predefined, DataTypeFamily::BinaryString), + + Decimal( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ExactNumeric, + ), + + TinyInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + SmallInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Integer( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + BigInt( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::IntegerNumeric, + DataTypeFamily::ExactNumeric, + ), + + Float( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Double( + DataTypeFamily::Predefined, + DataTypeFamily::Numeric, + DataTypeFamily::ApproximateNumeric, + ), + + Date(DataTypeFamily::Predefined, DataTypeFamily::DateTime), + + TimeWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Time, + ), + + TimestampWithoutTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + ), + + TimestampWithLocalTimeZone( + DataTypeFamily::Predefined, + DataTypeFamily::DateTime, + DataTypeFamily::Timestamp, + DataTypeFamily::Extension, + ), + + Array(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Multiset(DataTypeFamily::Constructed, DataTypeFamily::Collection), + + Map(DataTypeFamily::Constructed, DataTypeFamily::Extension), + + Row(DataTypeFamily::Constructed), +} + +struct DataTypeRoot { + families: HashSet<DataTypeFamily>, +} + +impl DataTypeRoot { + fn new(families: &[DataTypeFamily]) -> Self { + let set: HashSet<DataTypeFamily> = families.iter().cloned().collect::<HashSet<_>>(); + DataTypeRoot { families: set } + } + + pub fn get_families(&self) -> &HashSet<DataTypeFamily> { + &self.families + } +} + +/// Data type for paimon table. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/DataType.java#L45> +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DataType { + is_nullable: bool, + type_root: DataTypeRoot, +} + +pub trait DataTypeTrait { + /** Returns whether a value of this type can be {@code null}. */ + fn is_nullable(&self) -> bool { + self.is_nullable + } + + /** + * Returns the root of this type. It is an essential description without additional parameters. + */ + fn get_type_root(&self) -> &DataTypeRoot { + self.type_root + } + + /** + * Returns whether the root of the type equals to the {@code typeRoot} or not. + * + * @param typeRoot The root type to check against for equality + */ + fn is(&self, type_root: &DataTypeRoot) -> bool { + self.type_root == type_root + } + + /** + * Returns whether the family type of the type equals to the {@code family} or not. + * + * @param family The family type to check against for equality + */ + fn is_with_family(&self, family: &DataTypeFamily) -> bool { + self.type_root.families.contains(family) + } + + /** + * Returns whether the root of the type equals to at least on of the {@code typeRoots} or not. + * + * @param typeRoots The root types to check against for equality + */ + fn is_any_of(&self, type_roots: &[DataTypeRoot]) -> bool { + type_roots.iter().any(|tr| self.is(tr)) + } + + /** + * Returns whether the root of the type is part of at least one family of the {@code typeFamily} + * or not. + * + * @param typeFamilies The families to check against for equality + */ + fn is_any_of_with_family(&self, type_families: &[DataTypeFamily]) -> bool { + type_families.iter().any(|tf| self.families.contains(tf)) + } + + /** + * Returns a deep copy of this type with possibly different nullability. + * + * @param isNullable the intended nullability of the copied type + * @return a deep copy + */ + fn copy(&self, is_nullable: bool) -> Self; + + /** + * Returns a deep copy of this type. It requires an implementation of {@link #copy(boolean)}. + * + * @return a deep copy + */ + fn copy_with_nullable(&self) -> Self { + self.copy(self.is_nullable) + } + + /** + * Compare two data types without nullable. + * + * @param o the target data type + */ + fn copy_ignore_nullable(&self) -> Self { + self.copy(false) + } + + /** + * Compare two data types without nullable. + * + * @param o the target data type + */ + fn equals_ignore_nullable(&self, other: &Self) -> bool { + self.copy(true) == other.copy(true) + } + + fn equals(&self, other: &Self) -> bool { + self.is_nullable == other.is_nullable + && self.type_root == other.type_root + && self.families == other.families + } + + fn hash_code<T: Hash, U: Hash>(t: &T, u: &U) -> u64 { + let mut s: DefaultHasher = DefaultHasher::new(); + t.hash(&mut s); + u.hash(&mut s); + s.finish() + } + + /** + * Returns a string that summarizes this type as SQL standard string for printing to a console. + * An implementation might shorten long names or skips very specific properties. + */ + fn as_sql_string(&self) -> String; + + fn serialize_json( + &self, + writer: &mut impl std::io::Write, + ) -> Result<(), serde_json::Error> { + to_writer(writer, &Value::String(self.as_sql_string()))?; + Ok(()) + } + + fn with_nullability(&self, format: &str, params: Vec<&dyn ToString>) -> String { + if !self.is_nullable { + format!( + "{} NOT NULL", + format( + format, + params + .into_iter() + .map(|p| p.to_string()) + .collect::<Vec<_>>() + .join(", ") + ) + ) + } else { + format( + format, + params + .into_iter() + .map(|p| p.to_string()) + .collect::<Vec<_>>() + .join(", "), + ) + } + } + + fn to_string(&self) -> String { + self.as_sql_string() + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R; + + fn collect_field_ids(&self, field_ids: &mut HashSet<i32>) {} + + fn not_null(&self) -> Self { + self.copy(false) + } + + fn nullable(&self) -> Self { + self.copy(true) + } +} + +impl DataTypeTrait for DataType { + fn copy(&self, is_nullable: bool) -> Self { + panic!("Unsupported copy operation."); + } + + fn as_sql_string(&self) -> String { + panic!("Unsupported as_sql_string operation."); + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + panic!("Unsupported accept operation."); + } +} + +impl DataType { + pub fn new(is_nullable: bool, root: DataTypeRoot) -> Box<dyn DataType> { + DataType { + is_nullable, + type_root: root, + } + } +} + +impl Display for DataType { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + todo!() + } +} + +impl FromStr for DataType { + type Err = Error; + + fn from_str(_: &str) -> Result<Self, Self::Err> { + todo!() + } +} + +/// ArrayType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/ArrayType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ArrayType { + pub element_type: DataType, +} + +impl DataTypeTrait for ArrayType { + fn copy(&self, is_nullable: bool) -> Self { + ArrayType::new(is_nullable, self.element_type) + } + + fn as_sql_string(&self) -> String { + format!(Self::FORMAT.replace("%s", &self.get_element_type().as_sql_string())) + + if self.data_type.is_nullable { + " NULL" + } else { + "" + } + } + + fn serialize_json<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut state = serializer.serialize_struct("ArrayType", 2)?; + state.serialize_field( + "type", + if self.data_type.is_nullable { + "ARRAY" + } else { + "ARRAY NOT NULL" + }, + )?; + state.serialize_field("element", &self.get_element_type())?; + state.end() + } + + fn equals(&self, other: &Self) -> bool { + self.is_nullable == other.is_nullable && self.element_type == other.element_type + } + + fn hash_code<H: Hasher>(&self, state: &mut H) { + self.is_nullable.hash(state); + self.element_type.hash(state); + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl ArrayType { + pub const FORMAT: &str = "ARRAY<%s>"; + + pub fn new(is_nullable: bool, element_type: DataType) -> Self { + ArrayType::new_with_result(is_nullable, element_type).unwrap() + } + + pub fn new_with_result( + is_nullable: bool, + element_type: DataType, + ) -> Result<Self, &'static str> { + if None(element_type) { + Err("Element type must not be null.") + } else { + Ok(ArrayType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::ARRAY, + }, + }) + } + } + + pub fn with_element_type(element_type: DataType) -> Self { + ArrayType::new(element_type, true) + } + + pub fn get_element_type(&self) -> &DataType { + &self.element_type + } +} + +/// BigIntType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/BigIntType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct BigIntType { + pub element_type: DataType, +} + +impl DataTypeTrait for BigIntType { + fn copy(&self, is_nullable: bool) -> Self { + BigIntType::new(is_nullable) + } + + fn as_sql_string(&self) -> String { + Self::FORMAT.replace("%s", "") + if self.is_nullable { " NULL" } else { "" } + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl BigIntType { + pub const FORMAT: &str = "BIGINT"; + + pub fn new(is_nullable: bool) -> Self { + BigIntType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::BigInt, + }, + } + } + + pub fn default() -> Self { + BigIntType::new(true) + } +} + +/// BinaryType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/BinaryType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct BinaryType { + pub element_type: DataType, + length: usize, +} + +impl DataTypeTrait for BinaryType { + fn copy(&self, is_nullable: bool) -> Self { + BinaryType::new(is_nullable, self.length) + } + + fn as_sql_string(&self) -> String { + format!(BinaryType::FORMAT, self.length) + if self.is_nullable { " NULL" } else { "" } + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl BinaryType { + pub const MIN_LENGTH: usize = 1; + + pub const MAX_LENGTH: usize = isize::MAX as usize; + + pub const DEFAULT_LENGTH: usize = 1; + + pub const FORMAT: &str = "BINARY({})"; + + pub fn new(is_nullable: bool, length: usize) -> Self { + BinaryType::new_with_result(is_nullable, length).unwrap() + } + + pub fn new_with_result(is_nullable: bool, length: usize) -> Result<Self, &'static str> { + if length < BinaryType::MIN_LENGTH { + Err("Binary string length must be at least 1.") + } else { + Ok(BinaryType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::BINARY, + }, + length, + }) + } + } + + pub fn with_length(length: usize) -> Self { + BinaryType::new(true, length) + } + + pub fn default() -> Self { + BinaryType::with_length(BinaryType::DEFAULT_LENGTH).unwrap() + } + + pub fn get_length(&self) -> usize { + self.length + } +} + +/// BooleanType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/BooleanType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct BooleanType { + pub element_type: DataType, +} + +impl DataTypeTrait for BooleanType { + fn copy(&self, is_nullable: bool) -> Self { + BooleanType::new(is_nullable) + } + + fn as_sql_string(&self) -> String { + format!( + "{}{}", + BooleanType::FORMAT, + if self.is_nullable { " NULL" } else { "" } + ) + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl BooleanType { + pub const FORMAT: &str = "BOOLEAN"; + + pub fn new(is_nullable: bool) -> Self { + BooleanType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::BOOLEAN, + }, + } + } + + pub fn default() -> Self { + BooleanType::new(true) + } +} + +/// CharType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/db8bcd7fdd9c2705435d2ab1d2341c52d1f67ee5/paimon-common/src/main/java/org/apache/paimon/types/CharType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct CharType { + element_type: DataType, + length: usize, +} + +impl DataTypeTrait for CharType { + fn copy(&self, is_nullable: bool) -> Self { + CharType::new(is_nullable, self.length) + } + + /// Returns the SQL string representation of this type. + fn as_sql_string(&self) -> String { + format!( + Self::FORMAT.replace("%d", &self.get_length().to_string()) + "{}", + if self.data_type.is_nullable { + " NULL" + } else { + "" + } + ) + } + + fn equals(&self, other: &Self) -> bool { + if self == other { + return true; + } + + self.is_nullable == other.is_nullable && self.length == other.length + } + + fn hash_code<H: Hasher>(&self, state: &mut H) { + let mut hasher: DefaultHasher = DefaultHasher::new(); + self.is_nullable.hash(&mut hasher); + hasher.finish().hash(state); + self.length.hash(state); + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl CharType { + pub const DEFAULT_LENGTH: usize = 1; + + pub const MIN_LENGTH: usize = 1; + + pub const MAX_LENGTH: usize = 255; + + pub const FORMAT: &str = "CHAR(%d)"; + + pub fn new(is_nullable: bool, length: usize) -> Self { + CharType::new(is_nullable, length).unwrap() + } + + pub fn new(is_nullable: bool, length: usize) -> Result<Self, &'static str> { + if length < Self::MIN_LENGTH || length > Self::MAX_LENGTH { + Err("Character string length must be between 1 and 255 (both inclusive).") + } else { + Ok(CharType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Char, + }, + length, + }) + } + } + + pub fn default() -> Self { + CharType::new(Self::DEFAULT_LENGTH, false) + } + + pub fn get_length(&self) -> usize { + &self.length + } +} + +/// DateType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DateType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct DateType { + element_type: DataType, +} + +impl DataTypeTrait for DateType { + fn copy(&self, is_nullable: bool) -> Self { + DateType::new(is_nullable) + } + + fn as_sql_string(&self) -> String { + format!( + "{}{}", + DateType::FORMAT, + if self.is_nullable { " NULL" } else { "" } + ) + } + + fn accept<R>(&self, visitor: &dyn DataTypeVisitor<R>) -> R { + visitor.visit(self) + } +} + +impl DateType { + pub const FORMAT: &str = "DATE"; + + pub fn new(is_nullable: bool) -> Self { + DateType { + element_type: DataType { + is_nullable, + type_root: DataTypeRoot::Date, + }, + } + } + + pub fn default() -> Self { + DateType::new(true) + } +} + +/// DecimalType for paimon. +/// +/// Impl Reference: <https://github.com/apache/paimon/blob/master/paimon-common/src/main/java/org/apache/paimon/types/DecimalType.java>. +#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)] +pub struct DecimalType { + element_type: DataType, + precision: u32, Review Comment: How about using `rust_decimal`: https://docs.rs/rust_decimal/latest/rust_decimal/ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
