This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-rust.git


The following commit(s) were added to refs/heads/main by this push:
     new bbc3d95  feat: Implement ser/de for all types (#49)
bbc3d95 is described below

commit bbc3d95f9a375846b01a94950129a0109908397f
Author: Xuanwo <[email protected]>
AuthorDate: Sun Aug 11 19:48:44 2024 +0800

    feat: Implement ser/de for all types (#49)
---
 crates/paimon/src/spec/types.rs | 453 +++++++++++++++-------------------------
 1 file changed, 171 insertions(+), 282 deletions(-)

diff --git a/crates/paimon/src/spec/types.rs b/crates/paimon/src/spec/types.rs
index 5039caa..d27d9ab 100644
--- a/crates/paimon/src/spec/types.rs
+++ b/crates/paimon/src/spec/types.rs
@@ -18,13 +18,9 @@
 use crate::error::*;
 use crate::spec::DataField;
 use bitflags::bitflags;
-use serde::de::{MapAccess, Visitor};
-use serde::ser::SerializeStruct;
-use serde::{Deserialize, Deserializer, Serialize, Serializer};
-use serde_with::{DeserializeFromStr, SerializeDisplay};
-use std::fmt;
+use serde::{Deserialize, Serialize};
+use serde_with::{serde_as, FromInto, SerializeDisplay};
 use std::fmt::{Debug, Display, Formatter};
-use std::str::FromStr;
 
 bitflags! {
 /// An enumeration of Data type families for clustering {@link DataTypeRoot}s 
into categories.
@@ -136,9 +132,13 @@ impl DataType {
 /// Data type of an array of elements with same subtype.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/ArrayType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
 pub struct ArrayType {
+    #[serde(rename = "type")]
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::ARRAY>>")]
     nullable: bool,
+    #[serde(rename = "element")]
     element_type: Box<DataType>,
 }
 
@@ -159,88 +159,6 @@ impl ArrayType {
     }
 }
 
-impl Serialize for ArrayType {
-    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, 
S::Error>
-    where
-        S: Serializer,
-    {
-        let mut s = serializer.serialize_struct("ArrayType", 2)?;
-        let typ = if self.nullable {
-            "ARRAY"
-        } else {
-            "ARRAY NOT NULL"
-        };
-        s.serialize_field("type", typ)?;
-        s.serialize_field("element", &self.element_type)?;
-        s.end()
-    }
-}
-
-impl<'de> Deserialize<'de> for ArrayType {
-    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
-    where
-        D: Deserializer<'de>,
-    {
-        #[derive(Deserialize)]
-        #[serde(field_identifier, rename_all = "lowercase")]
-        enum Field {
-            Type,
-            Element,
-        }
-
-        struct ArrayTypeVisitor;
-
-        impl<'de> Visitor<'de> for ArrayTypeVisitor {
-            type Value = ArrayType;
-
-            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result 
{
-                formatter.write_str("ArrayType")
-            }
-
-            fn visit_map<V>(self, mut map: V) -> 
std::result::Result<ArrayType, V::Error>
-            where
-                V: MapAccess<'de>,
-            {
-                let mut nullable = None;
-                let mut element: Option<DataType> = None;
-                while let Some(key) = map.next_key()? {
-                    match key {
-                        Field::Type => {
-                            if nullable.is_some() {
-                                return 
Err(serde::de::Error::duplicate_field("type"));
-                            }
-                            match map.next_value()? {
-                                "ARRAY" => nullable = Some(true),
-                                "ARRAY NOT NULL" => nullable = Some(false),
-                                v => Err(serde::de::Error::invalid_value(
-                                    serde::de::Unexpected::Str(v),
-                                    &"ARRAY or ARRAY NOT NULL",
-                                ))?,
-                            }
-                        }
-                        Field::Element => {
-                            if element.is_some() {
-                                return 
Err(serde::de::Error::duplicate_field("element"));
-                            }
-                            element = Some(map.next_value()?);
-                        }
-                    }
-                }
-
-                Ok(ArrayType {
-                    nullable: nullable.ok_or_else(|| 
serde::de::Error::missing_field("type"))?,
-                    element_type: element
-                        .ok_or_else(|| 
serde::de::Error::missing_field("element"))?
-                        .into(),
-                })
-            }
-        }
-
-        const FIELDS: &[&str] = &["type", "element"];
-        deserializer.deserialize_struct("ArrayType", FIELDS, ArrayTypeVisitor)
-    }
-}
-
 /// BigIntType for paimon.
 ///
 /// Data type of an 8-byte (2^64) signed integer with values from 
-9,223,372,036,854,775,808 to 9,223,372,036,854,775,807.
@@ -347,35 +265,14 @@ impl BinaryType {
 /// Data type of a boolean with a (possibly) three-valued logic of `TRUE`, 
`FALSE`, `UNKNOWN`.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/master/paimon-common/src/release-0.8.2/java/org/apache/paimon/types/BooleanType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromStr, SerializeDisplay, 
Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(transparent)]
 pub struct BooleanType {
+    #[serde_as(as = 
"FromInto<serde_utils::NullableType<serde_utils::BOOLEAN>>")]
     nullable: bool,
 }
 
-impl Display for BooleanType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "BOOLEAN")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
-impl FromStr for BooleanType {
-    type Err = Error;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "BOOLEAN" => Ok(Self { nullable: true }),
-            "BOOLEAN NOT NULL" => Ok(Self { nullable: false }),
-            v => Err(Error::DataTypeInvalid {
-                message: format!("invalid boolean type: {v}"),
-            })?,
-        }
-    }
-}
-
 impl Default for BooleanType {
     fn default() -> Self {
         Self::new()
@@ -459,21 +356,14 @@ impl CharType {
 /// Data type of a date consisting of `year-month-day` with values ranging 
from `0000-01-01` to `9999-12-31`
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/DateType.java>.
-#[derive(Debug, Clone, PartialEq, Hash, Eq, Deserialize, SerializeDisplay)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Hash, Eq, Serialize, Deserialize)]
+#[serde(transparent)]
 pub struct DateType {
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::DATE>>")]
     nullable: bool,
 }
 
-impl Display for DateType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "DATE")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
 impl Default for DateType {
     fn default() -> Self {
         Self::new()
@@ -499,7 +389,7 @@ impl DateType {
 /// Data type of a decimal number with fixed precision and scale.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/DecimalType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
 pub struct DecimalType {
     nullable: bool,
 
@@ -586,8 +476,11 @@ impl DecimalType {
 /// Data type of an 8-byte double precision floating point number.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/DoubleType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
+#[serde(transparent)]
 pub struct DoubleType {
+    #[serde_as(as = 
"FromInto<serde_utils::NullableType<serde_utils::DOUBLE>>")]
     nullable: bool,
 }
 
@@ -624,21 +517,14 @@ impl DoubleType {
 /// FloatType for paimon.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/FloatType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
+#[serde(transparent)]
 pub struct FloatType {
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::FLOAT>>")]
     nullable: bool,
 }
 
-impl Display for FloatType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "FLOAT")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
 impl Default for FloatType {
     fn default() -> Self {
         Self::new()
@@ -664,21 +550,14 @@ impl FloatType {
 /// Data type of a 4-byte (2^32) signed integer with values from 
-2,147,483,648 to 2,147,483,647.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/IntType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(transparent)]
 pub struct IntType {
+    #[serde_as(as = 
"FromInto<serde_utils::NullableType<serde_utils::INTEGER>>")]
     nullable: bool,
 }
 
-impl Display for IntType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "INTEGER")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
 impl Default for IntType {
     fn default() -> Self {
         Self::new()
@@ -775,21 +654,14 @@ impl LocalZonedTimestampType {
 /// Data type of a 2-byte (2^16) signed integer with values from -32,768 to 
32,767.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/SmallIntType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
+#[serde(transparent)]
 pub struct SmallIntType {
+    #[serde_as(as = 
"FromInto<serde_utils::NullableType<serde_utils::SMALLINT>>")]
     nullable: bool,
 }
 
-impl Display for SmallIntType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "SMALLINT")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
 impl Default for SmallIntType {
     fn default() -> Self {
         Self::new()
@@ -949,21 +821,14 @@ impl TimestampType {
 /// Data type of a 1-byte signed integer with values from -128 to 127.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/master/paimon-common/src/release-0.8.2/java/org/apache/paimon/types/TinyIntType.java>.
-#[derive(Debug, Clone, PartialEq, Eq, Deserialize, SerializeDisplay, Hash)]
+#[serde_as]
+#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
+#[serde(transparent)]
 pub struct TinyIntType {
+    #[serde_as(as = 
"FromInto<serde_utils::NullableType<serde_utils::TINYINT>>")]
     nullable: bool,
 }
 
-impl Display for TinyIntType {
-    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
-        write!(f, "TINYINT")?;
-        if !self.nullable {
-            write!(f, " NOT NULL")?;
-        }
-        Ok(())
-    }
-}
-
 impl Default for TinyIntType {
     fn default() -> Self {
         Self::new()
@@ -1112,10 +977,15 @@ impl VarCharType {
 /// Data type of an associative array that maps keys `NULL` to values 
(including `NULL`).
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/MapType.java>.
+#[serde_as]
 #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
 pub struct MapType {
+    #[serde(rename = "type")]
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::MAP>>")]
     nullable: bool,
+    #[serde(rename = "key")]
     key_type: Box<DataType>,
+    #[serde(rename = "value")]
     value_type: Box<DataType>,
 }
 
@@ -1143,9 +1013,13 @@ impl MapType {
 /// elements with a common subtype.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/MultisetType.java>.
+#[serde_as]
 #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
 pub struct MultisetType {
+    #[serde(rename = "type")]
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::MAP>>")]
     nullable: bool,
+    #[serde(rename = "element")]
     element_type: Box<DataType>,
 }
 
@@ -1174,8 +1048,11 @@ impl MultisetType {
 /// column.
 ///
 /// Impl Reference: 
<https://github.com/apache/paimon/blob/release-0.8.2/paimon-common/src/main/java/org/apache/paimon/types/RowType.java>.
+#[serde_as]
 #[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Hash)]
 pub struct RowType {
+    #[serde(rename = "type")]
+    #[serde_as(as = "FromInto<serde_utils::NullableType<serde_utils::ROW>>")]
     nullable: bool,
     fields: Vec<DataField>,
 }
@@ -1194,122 +1071,134 @@ impl RowType {
     }
 }
 
+mod serde_utils {
+    // We use name like `BOOLEAN` by design to avoid conflict.
+    #![allow(clippy::upper_case_acronyms)]
+
+    use serde::{Deserialize, Deserializer, Serialize, Serializer};
+    use std::marker::PhantomData;
+
+    pub trait DataTypeName {
+        const NAME: &'static str;
+    }
+
+    pub struct BOOLEAN;
+    impl DataTypeName for BOOLEAN {
+        const NAME: &'static str = "BOOLEAN";
+    }
+
+    pub struct ARRAY;
+    impl DataTypeName for ARRAY {
+        const NAME: &'static str = "ARRAY";
+    }
+
+    pub struct DATE;
+    impl DataTypeName for DATE {
+        const NAME: &'static str = "DATE";
+    }
+
+    pub struct DOUBLE;
+    impl DataTypeName for DOUBLE {
+        const NAME: &'static str = "DOUBLE";
+    }
+
+    pub struct FLOAT;
+    impl DataTypeName for FLOAT {
+        const NAME: &'static str = "FLOAT";
+    }
+
+    pub struct INTEGER;
+    impl DataTypeName for INTEGER {
+        const NAME: &'static str = "INTEGER";
+    }
+
+    pub struct SMALLINT;
+    impl DataTypeName for SMALLINT {
+        const NAME: &'static str = "SMALLINT";
+    }
+
+    pub struct TINYINT;
+    impl DataTypeName for TINYINT {
+        const NAME: &'static str = "TINYINT";
+    }
+
+    pub struct MAP;
+    impl DataTypeName for MAP {
+        const NAME: &'static str = "MAP";
+    }
+
+    pub struct MULTISET;
+    impl DataTypeName for MULTISET {
+        const NAME: &'static str = "MULTISET";
+    }
+
+    pub struct ROW;
+    impl DataTypeName for ROW {
+        const NAME: &'static str = "ROW";
+    }
+
+    pub struct NullableType<T: DataTypeName> {
+        nullable: bool,
+        value: PhantomData<T>,
+    }
+
+    impl<T: DataTypeName> From<bool> for NullableType<T> {
+        fn from(value: bool) -> Self {
+            Self {
+                nullable: value,
+                value: PhantomData,
+            }
+        }
+    }
+    impl<T: DataTypeName> From<NullableType<T>> for bool {
+        fn from(value: NullableType<T>) -> Self {
+            value.nullable
+        }
+    }
+
+    impl<T: DataTypeName> Serialize for NullableType<T> {
+        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+        where
+            S: Serializer,
+        {
+            if self.nullable {
+                serializer.serialize_str(T::NAME)
+            } else {
+                serializer.serialize_str(&format!("{} NOT NULL", T::NAME))
+            }
+        }
+    }
+
+    /// TODO: we should support more edge cases.
+    impl<'de, T: DataTypeName> Deserialize<'de> for NullableType<T> {
+        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: Deserializer<'de>,
+        {
+            let s = String::deserialize(deserializer)?;
+
+            let (name, nullable) = s.split_once(" ").unwrap_or((s.as_str(), 
""));
+
+            if name == T::NAME && nullable.is_empty() {
+                Ok(NullableType::from(true))
+            } else if name == T::NAME && nullable == "NOT NULL" {
+                Ok(NullableType::from(false))
+            } else {
+                let expect = format!("{} or {} NOT NULL", T::NAME, T::NAME);
+                Err(serde::de::Error::invalid_value(
+                    serde::de::Unexpected::Str(s.as_str()),
+                    &expect.as_str(),
+                ))
+            }
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
 
     use super::*;
 
-    #[test]
-    fn test_data_type_to_string() {
-        assert_eq!(BooleanType::with_nullable(true).to_string(), "BOOLEAN");
-        assert_eq!(
-            BooleanType::with_nullable(false).to_string(),
-            "BOOLEAN NOT NULL"
-        );
-        assert_eq!(TinyIntType::with_nullable(true).to_string(), "TINYINT");
-        assert_eq!(
-            TinyIntType::with_nullable(false).to_string(),
-            "TINYINT NOT NULL"
-        );
-        assert_eq!(SmallIntType::with_nullable(true).to_string(), "SMALLINT");
-        assert_eq!(
-            SmallIntType::with_nullable(false).to_string(),
-            "SMALLINT NOT NULL"
-        );
-        assert_eq!(IntType::with_nullable(true).to_string(), "INTEGER");
-        assert_eq!(
-            IntType::with_nullable(false).to_string(),
-            "INTEGER NOT NULL"
-        );
-        assert_eq!(BigIntType::with_nullable(true).to_string(), "BIGINT");
-        assert_eq!(
-            BigIntType::with_nullable(false).to_string(),
-            "BIGINT NOT NULL"
-        );
-        assert_eq!(
-            DecimalType::with_nullable(true, 10, 2).unwrap().to_string(),
-            "DECIMAL(10, 2)"
-        );
-        assert_eq!(
-            DecimalType::with_nullable(false, 10, 2)
-                .unwrap()
-                .to_string(),
-            "DECIMAL(10, 2) NOT NULL"
-        );
-        assert_eq!(DoubleType::with_nullable(true).to_string(), "DOUBLE");
-        assert_eq!(
-            DoubleType::with_nullable(false).to_string(),
-            "DOUBLE NOT NULL"
-        );
-        assert_eq!(FloatType::with_nullable(true).to_string(), "FLOAT");
-        assert_eq!(
-            FloatType::with_nullable(false).to_string(),
-            "FLOAT NOT NULL"
-        );
-        assert_eq!(
-            BinaryType::with_nullable(true, 10).unwrap().to_string(),
-            "BINARY(10)"
-        );
-        assert_eq!(
-            BinaryType::with_nullable(false, 10).unwrap().to_string(),
-            "BINARY(10) NOT NULL"
-        );
-        assert_eq!(
-            VarBinaryType::try_new(true, 10).unwrap().to_string(),
-            "VARBINARY(10)"
-        );
-        assert_eq!(
-            VarBinaryType::try_new(false, 10).unwrap().to_string(),
-            "VARBINARY(10) NOT NULL"
-        );
-        assert_eq!(
-            CharType::with_nullable(true, 10).unwrap().to_string(),
-            "CHAR(10)"
-        );
-        assert_eq!(
-            CharType::with_nullable(false, 10).unwrap().to_string(),
-            "CHAR(10) NOT NULL"
-        );
-        assert_eq!(
-            VarCharType::with_nullable(true, 10).unwrap().to_string(),
-            "VARCHAR(10)"
-        );
-        assert_eq!(
-            VarCharType::with_nullable(false, 10).unwrap().to_string(),
-            "VARCHAR(10) NOT NULL"
-        );
-        assert_eq!(DateType::with_nullable(true).to_string(), "DATE");
-        assert_eq!(DateType::with_nullable(false).to_string(), "DATE NOT 
NULL");
-        assert_eq!(
-            LocalZonedTimestampType::with_nullable(true, 6)
-                .unwrap()
-                .to_string(),
-            "TIMESTAMP WITH LOCAL TIME ZONE(6)"
-        );
-        assert_eq!(
-            LocalZonedTimestampType::with_nullable(false, 6)
-                .unwrap()
-                .to_string(),
-            "TIMESTAMP WITH LOCAL TIME ZONE(6) NOT NULL"
-        );
-        assert_eq!(
-            TimeType::with_nullable(true, 6).unwrap().to_string(),
-            "TIME(6)"
-        );
-        assert_eq!(
-            TimeType::with_nullable(false, 6).unwrap().to_string(),
-            "TIME(6) NOT NULL"
-        );
-        assert_eq!(
-            TimestampType::with_nullable(false, 6).unwrap().to_string(),
-            "TIMESTAMP(6) NOT NULL"
-        );
-        assert_eq!(
-            TimestampType::with_nullable(true, 6).unwrap().to_string(),
-            "TIMESTAMP(6)"
-        );
-    }
-
     /// TODO: replace expect with exist fixture.
     #[test]
     fn test_data_type_serialize() {

Reply via email to