sunchao commented on a change in pull request #9705:
URL: https://github.com/apache/arrow/pull/9705#discussion_r599288806



##########
File path: rust/parquet/src/arrow/schema.rs
##########
@@ -1508,18 +1514,22 @@ mod tests {
         message test_schema {
             REQUIRED BOOLEAN boolean;
             REQUIRED INT32   int8  (INT_8);
+            REQUIRED INT32   uint8  (INTEGER(8,false));

Review comment:
       nit: extra space after `unit8`?

##########
File path: rust/parquet/src/schema/parser.rs
##########
@@ -281,19 +329,142 @@ impl<'a> Parser<'a> {
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
         // Parse converted type
-        let (converted_type, precision, scale) = if let Some("(") = 
self.tokenizer.next()
+        let (logical_type, converted_type, precision, scale) = if let 
Some("(") =
+            self.tokenizer.next()
         {
-            let tpe = self
+            let (mut logical, mut converted) = self
                 .tokenizer
                 .next()
-                .ok_or_else(|| general_err!("Expected converted type, found 
None"))
-                .and_then(|v| v.to_uppercase().parse::<ConvertedType>())?;
+                .ok_or_else(|| {
+                    general_err!("Expected logical or converted type, found 
None")
+                })
+                .and_then(|v| {
+                    let upper = v.to_uppercase();
+                    let logical = upper.parse::<LogicalType>();
+                    match logical {
+                        Ok(logical) => Ok((
+                            Some(logical.clone()),
+                            ConvertedType::from(Some(logical)),
+                        )),
+                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+                    }
+                })?;
 
             // Parse precision and scale for decimals
             let mut precision: i32 = -1;
             let mut scale: i32 = -1;
 
-            if tpe == ConvertedType::DECIMAL {
+            // Parse the concrete logical type
+            if let Some(tpe) = &logical {
+                match tpe {
+                    LogicalType::DECIMAL(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            precision = parse_i32(
+                                self.tokenizer.next(),
+                                "Expected precision, found None",
+                                "Failed to parse precision for DECIMAL type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                scale = parse_i32(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failure to parse timezone info for TIME 
type",

Review comment:
       I think this error message and the previous one are incorrect.

##########
File path: rust/parquet/src/schema/parser.rs
##########
@@ -222,18 +258,29 @@ impl<'a> Parser<'a> {
             .next()
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
-        // Parse converted type if exists
-        let converted_type = if let Some("(") = self.tokenizer.next() {
+        // Parse logical or converted type if exists
+        let (logical_type, converted_type) = if let Some("(") = 
self.tokenizer.next() {
             let tpe = self
                 .tokenizer
                 .next()
                 .ok_or_else(|| general_err!("Expected converted type, found 
None"))
-                .and_then(|v| v.to_uppercase().parse::<ConvertedType>())?;
+                .and_then(|v| {
+                    // Try logical type first
+                    let upper = v.to_uppercase();
+                    let logical = upper.parse::<LogicalType>();
+                    match logical {
+                        Ok(logical) => Ok((
+                            Some(logical.clone()),
+                            ConvertedType::from(Some(logical)),

Review comment:
       Probably not very related: it's a bit strange that `ConvertedType::from` 
takes an option rather than just the logical type. The latter may be more 
intuitive.

##########
File path: rust/parquet/src/schema/parser.rs
##########
@@ -281,19 +329,142 @@ impl<'a> Parser<'a> {
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
         // Parse converted type
-        let (converted_type, precision, scale) = if let Some("(") = 
self.tokenizer.next()
+        let (logical_type, converted_type, precision, scale) = if let 
Some("(") =
+            self.tokenizer.next()
         {
-            let tpe = self
+            let (mut logical, mut converted) = self
                 .tokenizer
                 .next()
-                .ok_or_else(|| general_err!("Expected converted type, found 
None"))
-                .and_then(|v| v.to_uppercase().parse::<ConvertedType>())?;
+                .ok_or_else(|| {
+                    general_err!("Expected logical or converted type, found 
None")
+                })
+                .and_then(|v| {
+                    let upper = v.to_uppercase();
+                    let logical = upper.parse::<LogicalType>();
+                    match logical {
+                        Ok(logical) => Ok((
+                            Some(logical.clone()),
+                            ConvertedType::from(Some(logical)),
+                        )),
+                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+                    }
+                })?;
 
             // Parse precision and scale for decimals
             let mut precision: i32 = -1;
             let mut scale: i32 = -1;
 
-            if tpe == ConvertedType::DECIMAL {
+            // Parse the concrete logical type
+            if let Some(tpe) = &logical {
+                match tpe {
+                    LogicalType::DECIMAL(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            precision = parse_i32(
+                                self.tokenizer.next(),
+                                "Expected precision, found None",
+                                "Failed to parse precision for DECIMAL type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                scale = parse_i32(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failure to parse timezone info for TIME 
type",
+                                )?; // TODO: this might not cater for the case 
of no scale correctly
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = 
Some(LogicalType::DECIMAL(DecimalType {
+                                    precision,
+                                    scale,
+                                }));
+                                converted = 
ConvertedType::from(logical.clone());
+                            } else {
+                                scale = 0;
+                                logical = 
Some(LogicalType::DECIMAL(DecimalType {
+                                    precision,
+                                    scale,
+                                }));
+                                converted = 
ConvertedType::from(logical.clone());
+                            }
+                        }
+                    }
+                    LogicalType::TIME(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let unit = parse_timeunit(
+                                self.tokenizer.next(),
+                                "Invalid timeunit found",
+                                "Failed to parse timeunit for TIME type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                let is_adjusted_to_u_t_c = parse_bool(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failure to parse timezone info for TIME 
type",
+                                )?;
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = Some(LogicalType::TIME(TimeType {
+                                    unit,
+                                    is_adjusted_to_u_t_c,
+                                }));
+                                converted = 
ConvertedType::from(logical.clone());
+                            } else {
+                                // Invalid token for unit
+                                self.tokenizer.backtrack();
+                            }
+                        }
+                    }
+                    LogicalType::TIMESTAMP(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let unit = parse_timeunit(
+                                self.tokenizer.next(),
+                                "Invalid timeunit found",
+                                "Failed to parse timeunit for TIMESTAMP type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                let is_adjusted_to_u_t_c = parse_bool(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failure to parse timezone info for 
TIMESTAMP type",
+                                )?;
+                                assert_token(self.tokenizer.next(), ")")?;
+                                logical = 
Some(LogicalType::TIMESTAMP(TimestampType {
+                                    unit,
+                                    is_adjusted_to_u_t_c,
+                                }));
+                                converted = 
ConvertedType::from(logical.clone());
+                            } else {
+                                // Invalid token for unit
+                                self.tokenizer.backtrack();
+                            }
+                        }
+                    }
+                    LogicalType::INTEGER(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            let bit_width = parse_i32(

Review comment:
       We might want to check that the `bit_width` is one of 8, 16, 32 and 64. 
All others should be invalid.

##########
File path: rust/parquet/src/schema/parser.rs
##########
@@ -281,19 +329,142 @@ impl<'a> Parser<'a> {
             .ok_or_else(|| general_err!("Expected name, found None"))?;
 
         // Parse converted type
-        let (converted_type, precision, scale) = if let Some("(") = 
self.tokenizer.next()
+        let (logical_type, converted_type, precision, scale) = if let 
Some("(") =
+            self.tokenizer.next()
         {
-            let tpe = self
+            let (mut logical, mut converted) = self
                 .tokenizer
                 .next()
-                .ok_or_else(|| general_err!("Expected converted type, found 
None"))
-                .and_then(|v| v.to_uppercase().parse::<ConvertedType>())?;
+                .ok_or_else(|| {
+                    general_err!("Expected logical or converted type, found 
None")
+                })
+                .and_then(|v| {
+                    let upper = v.to_uppercase();
+                    let logical = upper.parse::<LogicalType>();
+                    match logical {
+                        Ok(logical) => Ok((
+                            Some(logical.clone()),
+                            ConvertedType::from(Some(logical)),
+                        )),
+                        Err(_) => Ok((None, upper.parse::<ConvertedType>()?)),
+                    }
+                })?;
 
             // Parse precision and scale for decimals
             let mut precision: i32 = -1;
             let mut scale: i32 = -1;
 
-            if tpe == ConvertedType::DECIMAL {
+            // Parse the concrete logical type
+            if let Some(tpe) = &logical {
+                match tpe {
+                    LogicalType::DECIMAL(_) => {
+                        if let Some("(") = self.tokenizer.next() {
+                            precision = parse_i32(
+                                self.tokenizer.next(),
+                                "Expected precision, found None",
+                                "Failed to parse precision for DECIMAL type",
+                            )?;
+                            if let Some(",") = self.tokenizer.next() {
+                                scale = parse_i32(
+                                    self.tokenizer.next(),
+                                    "Invalid boolean found",
+                                    "Failure to parse timezone info for TIME 
type",
+                                )?; // TODO: this might not cater for the case 
of no scale correctly

Review comment:
       I think we always require both precision and scale to be present?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to