alamb commented on code in PR #2626:
URL: https://github.com/apache/arrow-rs/pull/2626#discussion_r967107935


##########
parquet/CONTRIBUTING.md:
##########
@@ -60,7 +60,18 @@ Run `cargo bench` for benchmarks.
 To build documentation, run `cargo doc --no-deps`.
 To compile and view in the browser, run `cargo doc --no-deps --open`.
 
-## Update Supported Parquet Version
+## Update Parquet Format
 
-To update Parquet format to a newer version, check if 
[parquet-format](https://github.com/sunchao/parquet-format-rs)
-version is available. Then simply update version of `parquet-format` crate in 
Cargo.toml.
+To generate the parquet format code run

Review Comment:
   ```suggestion
   To generate the parquet format (thrift definitions) code run
   ```



##########
.gitattributes:
##########
@@ -1,6 +1 @@
-r/R/RcppExports.R linguist-generated=true
-r/R/arrowExports.R linguist-generated=true
-r/src/RcppExports.cpp linguist-generated=true
-r/src/arrowExports.cpp linguist-generated=true
-r/man/*.Rd linguist-generated=true
-
+parquet/src/format.rs linguist-generated

Review Comment:
   👍 



##########
parquet/src/basic.rs:
##########
@@ -730,113 +736,129 @@ impl From<Option<LogicalType>> for ConvertedType {
 // ----------------------------------------------------------------------
 // parquet::FieldRepetitionType <=> Repetition conversion
 
-impl From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
+impl TryFrom<parquet::FieldRepetitionType> for Repetition {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
+        Ok(match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+            _ => return Err(general_err!("unexpected repetition type: {}", 
value.0)),

Review Comment:
   ```suggestion
               _ => return Err(general_err!("unexpected parquet repetition 
type: {}", value.0)),
   ```



##########
parquet/src/basic.rs:
##########
@@ -496,105 +496,111 @@ impl fmt::Display for ColumnOrder {
 // ----------------------------------------------------------------------
 // parquet::Type <=> Type conversion
 
-impl From<parquet::Type> for Type {
-    fn from(value: parquet::Type) -> Self {
-        match value {
-            parquet::Type::Boolean => Type::BOOLEAN,
-            parquet::Type::Int32 => Type::INT32,
-            parquet::Type::Int64 => Type::INT64,
-            parquet::Type::Int96 => Type::INT96,
-            parquet::Type::Float => Type::FLOAT,
-            parquet::Type::Double => Type::DOUBLE,
-            parquet::Type::ByteArray => Type::BYTE_ARRAY,
-            parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY,
-        }
+impl TryFrom<parquet::Type> for Type {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Type) -> Result<Self> {
+        Ok(match value {
+            parquet::Type::BOOLEAN => Type::BOOLEAN,
+            parquet::Type::INT32 => Type::INT32,
+            parquet::Type::INT64 => Type::INT64,
+            parquet::Type::INT96 => Type::INT96,
+            parquet::Type::FLOAT => Type::FLOAT,
+            parquet::Type::DOUBLE => Type::DOUBLE,
+            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
+            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
+            _ => return Err(general_err!("unexpected type: {}", value.0)),
+        })
     }
 }
 
 impl From<Type> for parquet::Type {
     fn from(value: Type) -> Self {
         match value {
-            Type::BOOLEAN => parquet::Type::Boolean,
-            Type::INT32 => parquet::Type::Int32,
-            Type::INT64 => parquet::Type::Int64,
-            Type::INT96 => parquet::Type::Int96,
-            Type::FLOAT => parquet::Type::Float,
-            Type::DOUBLE => parquet::Type::Double,
-            Type::BYTE_ARRAY => parquet::Type::ByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FixedLenByteArray,
+            Type::BOOLEAN => parquet::Type::BOOLEAN,
+            Type::INT32 => parquet::Type::INT32,
+            Type::INT64 => parquet::Type::INT64,
+            Type::INT96 => parquet::Type::INT96,
+            Type::FLOAT => parquet::Type::FLOAT,
+            Type::DOUBLE => parquet::Type::DOUBLE,
+            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
+            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::ConvertedType <=> ConvertedType conversion
 
-impl From<Option<parquet::ConvertedType>> for ConvertedType {
-    fn from(option: Option<parquet::ConvertedType>) -> Self {
-        match option {
+impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
+    type Error = ParquetError;
+
+    fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
+        Ok(match option {
             None => ConvertedType::NONE,
             Some(value) => match value {
-                parquet::ConvertedType::Utf8 => ConvertedType::UTF8,
-                parquet::ConvertedType::Map => ConvertedType::MAP,
-                parquet::ConvertedType::MapKeyValue => 
ConvertedType::MAP_KEY_VALUE,
-                parquet::ConvertedType::List => ConvertedType::LIST,
-                parquet::ConvertedType::Enum => ConvertedType::ENUM,
-                parquet::ConvertedType::Decimal => ConvertedType::DECIMAL,
-                parquet::ConvertedType::Date => ConvertedType::DATE,
-                parquet::ConvertedType::TimeMillis => 
ConvertedType::TIME_MILLIS,
-                parquet::ConvertedType::TimeMicros => 
ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TimestampMillis => {
+                parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
+                parquet::ConvertedType::MAP => ConvertedType::MAP,
+                parquet::ConvertedType::MAP_KEY_VALUE => 
ConvertedType::MAP_KEY_VALUE,
+                parquet::ConvertedType::LIST => ConvertedType::LIST,
+                parquet::ConvertedType::ENUM => ConvertedType::ENUM,
+                parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
+                parquet::ConvertedType::DATE => ConvertedType::DATE,
+                parquet::ConvertedType::TIME_MILLIS => 
ConvertedType::TIME_MILLIS,
+                parquet::ConvertedType::TIME_MICROS => 
ConvertedType::TIME_MICROS,
+                parquet::ConvertedType::TIMESTAMP_MILLIS => {
                     ConvertedType::TIMESTAMP_MILLIS
                 }
-                parquet::ConvertedType::TimestampMicros => {
+                parquet::ConvertedType::TIMESTAMP_MICROS => {
                     ConvertedType::TIMESTAMP_MICROS
                 }
-                parquet::ConvertedType::Uint8 => ConvertedType::UINT_8,
-                parquet::ConvertedType::Uint16 => ConvertedType::UINT_16,
-                parquet::ConvertedType::Uint32 => ConvertedType::UINT_32,
-                parquet::ConvertedType::Uint64 => ConvertedType::UINT_64,
-                parquet::ConvertedType::Int8 => ConvertedType::INT_8,
-                parquet::ConvertedType::Int16 => ConvertedType::INT_16,
-                parquet::ConvertedType::Int32 => ConvertedType::INT_32,
-                parquet::ConvertedType::Int64 => ConvertedType::INT_64,
-                parquet::ConvertedType::Json => ConvertedType::JSON,
-                parquet::ConvertedType::Bson => ConvertedType::BSON,
-                parquet::ConvertedType::Interval => ConvertedType::INTERVAL,
+                parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
+                parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
+                parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
+                parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
+                parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
+                parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
+                parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
+                parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
+                parquet::ConvertedType::JSON => ConvertedType::JSON,
+                parquet::ConvertedType::BSON => ConvertedType::BSON,
+                parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
+                _ => return Err(general_err!("unexpected converted type: {}", 
value.0)),
             },
-        }
+        })
     }
 }
 
 impl From<ConvertedType> for Option<parquet::ConvertedType> {
     fn from(value: ConvertedType) -> Self {
         match value {
             ConvertedType::NONE => None,
-            ConvertedType::UTF8 => Some(parquet::ConvertedType::Utf8),
-            ConvertedType::MAP => Some(parquet::ConvertedType::Map),
-            ConvertedType::MAP_KEY_VALUE => 
Some(parquet::ConvertedType::MapKeyValue),
-            ConvertedType::LIST => Some(parquet::ConvertedType::List),
-            ConvertedType::ENUM => Some(parquet::ConvertedType::Enum),
-            ConvertedType::DECIMAL => Some(parquet::ConvertedType::Decimal),
-            ConvertedType::DATE => Some(parquet::ConvertedType::Date),
-            ConvertedType::TIME_MILLIS => 
Some(parquet::ConvertedType::TimeMillis),
-            ConvertedType::TIME_MICROS => 
Some(parquet::ConvertedType::TimeMicros),
+            ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8),

Review Comment:
   It is somewhat annoying that the thrift compiler changed these enum variants 
to be all caps 😱 



##########
parquet/src/basic.rs:
##########
@@ -730,113 +736,129 @@ impl From<Option<LogicalType>> for ConvertedType {
 // ----------------------------------------------------------------------
 // parquet::FieldRepetitionType <=> Repetition conversion
 
-impl From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
+impl TryFrom<parquet::FieldRepetitionType> for Repetition {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
+        Ok(match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+            _ => return Err(general_err!("unexpected repetition type: {}", 
value.0)),
+        })
     }
 }
 
 impl From<Repetition> for parquet::FieldRepetitionType {
     fn from(value: Repetition) -> Self {
         match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::Required,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional,
-            Repetition::REPEATED => parquet::FieldRepetitionType::Repeated,
+            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
+            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
+            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::Encoding <=> Encoding conversion
 
-impl From<parquet::Encoding> for Encoding {
-    fn from(value: parquet::Encoding) -> Self {
-        match value {
-            parquet::Encoding::Plain => Encoding::PLAIN,
-            parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::Rle => Encoding::RLE,
-            parquet::Encoding::BitPacked => Encoding::BIT_PACKED,
-            parquet::Encoding::DeltaBinaryPacked => 
Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DeltaLengthByteArray => 
Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY,
-            parquet::Encoding::ByteStreamSplit => Encoding::BYTE_STREAM_SPLIT,
-        }
+impl TryFrom<parquet::Encoding> for Encoding {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Encoding) -> Result<Self> {
+        Ok(match value {
+            parquet::Encoding::PLAIN => Encoding::PLAIN,
+            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
+            parquet::Encoding::RLE => Encoding::RLE,
+            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
+            parquet::Encoding::DELTA_BINARY_PACKED => 
Encoding::DELTA_BINARY_PACKED,
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
+            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
+            parquet::Encoding::BYTE_STREAM_SPLIT => 
Encoding::BYTE_STREAM_SPLIT,
+            _ => return Err(general_err!("unexpected encoding: {}", value.0)),

Review Comment:
   I was curious about the other values (as in what variants are `_` matching) 
so I commented it out:
   
   ```
      Compiling parquet v22.0.0 (/Users/alamb/Software/arrow-rs/parquet)
   error[E0004]: non-exhaustive patterns: `Encoding(i32::MIN..=-1_i32)`, 
`Encoding(1_i32)` and `Encoding(10_i32..=i32::MAX)` not covered
      --> parquet/src/basic.rs:769:18
       |
   769 |         Ok(match value {
       |                  ^^^^^ patterns `Encoding(i32::MIN..=-1_i32)`, 
`Encoding(1_i32)` and `Encoding(10_i32..=i32::MAX)` not covered
       |
   
   ```
   
   In case any other reviewer was interestd



##########
parquet/src/basic.rs:
##########
@@ -496,105 +496,111 @@ impl fmt::Display for ColumnOrder {
 // ----------------------------------------------------------------------
 // parquet::Type <=> Type conversion
 
-impl From<parquet::Type> for Type {
-    fn from(value: parquet::Type) -> Self {
-        match value {
-            parquet::Type::Boolean => Type::BOOLEAN,
-            parquet::Type::Int32 => Type::INT32,
-            parquet::Type::Int64 => Type::INT64,
-            parquet::Type::Int96 => Type::INT96,
-            parquet::Type::Float => Type::FLOAT,
-            parquet::Type::Double => Type::DOUBLE,
-            parquet::Type::ByteArray => Type::BYTE_ARRAY,
-            parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY,
-        }
+impl TryFrom<parquet::Type> for Type {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Type) -> Result<Self> {
+        Ok(match value {
+            parquet::Type::BOOLEAN => Type::BOOLEAN,
+            parquet::Type::INT32 => Type::INT32,
+            parquet::Type::INT64 => Type::INT64,
+            parquet::Type::INT96 => Type::INT96,
+            parquet::Type::FLOAT => Type::FLOAT,
+            parquet::Type::DOUBLE => Type::DOUBLE,
+            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
+            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
+            _ => return Err(general_err!("unexpected type: {}", value.0)),

Review Comment:
   ```suggestion
               _ => return Err(general_err!("unexpected parquet type: {}", 
value.0)),
   ```



##########
parquet/src/lib.rs:
##########
@@ -61,6 +61,9 @@ macro_rules! experimental {
 pub mod errors;
 pub mod basic;
 
+#[allow(clippy::derivable_impls, clippy::match_single_binding)]
+pub mod format;

Review Comment:
   ```suggestion
   /// Automatically generated code for reading parquet thrift definition. 
   // see parquet/CONTRIBUTING.md for instructions on regenerating 
   pub mod format;
   ```



##########
parquet/src/basic.rs:
##########
@@ -730,113 +736,129 @@ impl From<Option<LogicalType>> for ConvertedType {
 // ----------------------------------------------------------------------
 // parquet::FieldRepetitionType <=> Repetition conversion
 
-impl From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
+impl TryFrom<parquet::FieldRepetitionType> for Repetition {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
+        Ok(match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+            _ => return Err(general_err!("unexpected repetition type: {}", 
value.0)),
+        })
     }
 }
 
 impl From<Repetition> for parquet::FieldRepetitionType {
     fn from(value: Repetition) -> Self {
         match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::Required,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional,
-            Repetition::REPEATED => parquet::FieldRepetitionType::Repeated,
+            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
+            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
+            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::Encoding <=> Encoding conversion
 
-impl From<parquet::Encoding> for Encoding {
-    fn from(value: parquet::Encoding) -> Self {
-        match value {
-            parquet::Encoding::Plain => Encoding::PLAIN,
-            parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::Rle => Encoding::RLE,
-            parquet::Encoding::BitPacked => Encoding::BIT_PACKED,
-            parquet::Encoding::DeltaBinaryPacked => 
Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DeltaLengthByteArray => 
Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY,
-            parquet::Encoding::ByteStreamSplit => Encoding::BYTE_STREAM_SPLIT,
-        }
+impl TryFrom<parquet::Encoding> for Encoding {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Encoding) -> Result<Self> {
+        Ok(match value {
+            parquet::Encoding::PLAIN => Encoding::PLAIN,
+            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
+            parquet::Encoding::RLE => Encoding::RLE,
+            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
+            parquet::Encoding::DELTA_BINARY_PACKED => 
Encoding::DELTA_BINARY_PACKED,
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
+            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
+            parquet::Encoding::BYTE_STREAM_SPLIT => 
Encoding::BYTE_STREAM_SPLIT,
+            _ => return Err(general_err!("unexpected encoding: {}", value.0)),
+        })
     }
 }
 
 impl From<Encoding> for parquet::Encoding {
     fn from(value: Encoding) -> Self {
         match value {
-            Encoding::PLAIN => parquet::Encoding::Plain,
-            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PlainDictionary,
-            Encoding::RLE => parquet::Encoding::Rle,
-            Encoding::BIT_PACKED => parquet::Encoding::BitPacked,
-            Encoding::DELTA_BINARY_PACKED => 
parquet::Encoding::DeltaBinaryPacked,
-            Encoding::DELTA_LENGTH_BYTE_ARRAY => 
parquet::Encoding::DeltaLengthByteArray,
-            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DeltaByteArray,
-            Encoding::RLE_DICTIONARY => parquet::Encoding::RleDictionary,
-            Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::ByteStreamSplit,
+            Encoding::PLAIN => parquet::Encoding::PLAIN,
+            Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY,
+            Encoding::RLE => parquet::Encoding::RLE,
+            Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED,
+            Encoding::DELTA_BINARY_PACKED => 
parquet::Encoding::DELTA_BINARY_PACKED,
+            Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY,
+            Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY,
+            Encoding::BYTE_STREAM_SPLIT => 
parquet::Encoding::BYTE_STREAM_SPLIT,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::CompressionCodec <=> Compression conversion
 
-impl From<parquet::CompressionCodec> for Compression {
-    fn from(value: parquet::CompressionCodec) -> Self {
-        match value {
-            parquet::CompressionCodec::Uncompressed => 
Compression::UNCOMPRESSED,
-            parquet::CompressionCodec::Snappy => Compression::SNAPPY,
-            parquet::CompressionCodec::Gzip => Compression::GZIP,
-            parquet::CompressionCodec::Lzo => Compression::LZO,
-            parquet::CompressionCodec::Brotli => Compression::BROTLI,
-            parquet::CompressionCodec::Lz4 => Compression::LZ4,
-            parquet::CompressionCodec::Zstd => Compression::ZSTD,
-        }
+impl TryFrom<parquet::CompressionCodec> for Compression {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::CompressionCodec) -> Result<Self> {
+        Ok(match value {
+            parquet::CompressionCodec::UNCOMPRESSED => 
Compression::UNCOMPRESSED,
+            parquet::CompressionCodec::SNAPPY => Compression::SNAPPY,
+            parquet::CompressionCodec::GZIP => Compression::GZIP,
+            parquet::CompressionCodec::LZO => Compression::LZO,
+            parquet::CompressionCodec::BROTLI => Compression::BROTLI,
+            parquet::CompressionCodec::LZ4 => Compression::LZ4,
+            parquet::CompressionCodec::ZSTD => Compression::ZSTD,
+            _ => return Err(general_err!("unexpected compression codec: {}", 
value.0)),
+        })
     }
 }
 
 impl From<Compression> for parquet::CompressionCodec {
     fn from(value: Compression) -> Self {
         match value {
-            Compression::UNCOMPRESSED => 
parquet::CompressionCodec::Uncompressed,
-            Compression::SNAPPY => parquet::CompressionCodec::Snappy,
-            Compression::GZIP => parquet::CompressionCodec::Gzip,
-            Compression::LZO => parquet::CompressionCodec::Lzo,
-            Compression::BROTLI => parquet::CompressionCodec::Brotli,
-            Compression::LZ4 => parquet::CompressionCodec::Lz4,
-            Compression::ZSTD => parquet::CompressionCodec::Zstd,
+            Compression::UNCOMPRESSED => 
parquet::CompressionCodec::UNCOMPRESSED,
+            Compression::SNAPPY => parquet::CompressionCodec::SNAPPY,
+            Compression::GZIP => parquet::CompressionCodec::GZIP,
+            Compression::LZO => parquet::CompressionCodec::LZO,
+            Compression::BROTLI => parquet::CompressionCodec::BROTLI,
+            Compression::LZ4 => parquet::CompressionCodec::LZ4,
+            Compression::ZSTD => parquet::CompressionCodec::ZSTD,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::PageType <=> PageType conversion
 
-impl From<parquet::PageType> for PageType {
-    fn from(value: parquet::PageType) -> Self {
-        match value {
-            parquet::PageType::DataPage => PageType::DATA_PAGE,
-            parquet::PageType::IndexPage => PageType::INDEX_PAGE,
-            parquet::PageType::DictionaryPage => PageType::DICTIONARY_PAGE,
-            parquet::PageType::DataPageV2 => PageType::DATA_PAGE_V2,
-        }
+impl TryFrom<parquet::PageType> for PageType {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::PageType) -> Result<Self> {
+        Ok(match value {
+            parquet::PageType::DATA_PAGE => PageType::DATA_PAGE,
+            parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE,
+            parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE,
+            parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2,
+            _ => return Err(general_err!("unexpected page type: {}", value.0)),

Review Comment:
   ```suggestion
               _ => return Err(general_err!("unexpected parquet page type: {}", 
value.0)),
   ```



##########
parquet/Cargo.toml:
##########
@@ -31,9 +31,8 @@ rust-version = "1.62"
 
 [dependencies]
 ahash = "0.8"
-parquet-format = { version = "4.0.0", default-features = false }
 bytes = { version = "1.1", default-features = false, features = ["std"] }
-thrift = { version = "0.13", default-features = false }
+thrift = { version = "0.16", default-features = false }

Review Comment:
   👍 



##########
parquet/src/basic.rs:
##########
@@ -496,105 +496,111 @@ impl fmt::Display for ColumnOrder {
 // ----------------------------------------------------------------------
 // parquet::Type <=> Type conversion
 
-impl From<parquet::Type> for Type {
-    fn from(value: parquet::Type) -> Self {
-        match value {
-            parquet::Type::Boolean => Type::BOOLEAN,
-            parquet::Type::Int32 => Type::INT32,
-            parquet::Type::Int64 => Type::INT64,
-            parquet::Type::Int96 => Type::INT96,
-            parquet::Type::Float => Type::FLOAT,
-            parquet::Type::Double => Type::DOUBLE,
-            parquet::Type::ByteArray => Type::BYTE_ARRAY,
-            parquet::Type::FixedLenByteArray => Type::FIXED_LEN_BYTE_ARRAY,
-        }
+impl TryFrom<parquet::Type> for Type {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Type) -> Result<Self> {
+        Ok(match value {
+            parquet::Type::BOOLEAN => Type::BOOLEAN,
+            parquet::Type::INT32 => Type::INT32,
+            parquet::Type::INT64 => Type::INT64,
+            parquet::Type::INT96 => Type::INT96,
+            parquet::Type::FLOAT => Type::FLOAT,
+            parquet::Type::DOUBLE => Type::DOUBLE,
+            parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY,
+            parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY,
+            _ => return Err(general_err!("unexpected type: {}", value.0)),
+        })
     }
 }
 
 impl From<Type> for parquet::Type {
     fn from(value: Type) -> Self {
         match value {
-            Type::BOOLEAN => parquet::Type::Boolean,
-            Type::INT32 => parquet::Type::Int32,
-            Type::INT64 => parquet::Type::Int64,
-            Type::INT96 => parquet::Type::Int96,
-            Type::FLOAT => parquet::Type::Float,
-            Type::DOUBLE => parquet::Type::Double,
-            Type::BYTE_ARRAY => parquet::Type::ByteArray,
-            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FixedLenByteArray,
+            Type::BOOLEAN => parquet::Type::BOOLEAN,
+            Type::INT32 => parquet::Type::INT32,
+            Type::INT64 => parquet::Type::INT64,
+            Type::INT96 => parquet::Type::INT96,
+            Type::FLOAT => parquet::Type::FLOAT,
+            Type::DOUBLE => parquet::Type::DOUBLE,
+            Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY,
+            Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::ConvertedType <=> ConvertedType conversion
 
-impl From<Option<parquet::ConvertedType>> for ConvertedType {
-    fn from(option: Option<parquet::ConvertedType>) -> Self {
-        match option {
+impl TryFrom<Option<parquet::ConvertedType>> for ConvertedType {
+    type Error = ParquetError;
+
+    fn try_from(option: Option<parquet::ConvertedType>) -> Result<Self> {
+        Ok(match option {
             None => ConvertedType::NONE,
             Some(value) => match value {
-                parquet::ConvertedType::Utf8 => ConvertedType::UTF8,
-                parquet::ConvertedType::Map => ConvertedType::MAP,
-                parquet::ConvertedType::MapKeyValue => 
ConvertedType::MAP_KEY_VALUE,
-                parquet::ConvertedType::List => ConvertedType::LIST,
-                parquet::ConvertedType::Enum => ConvertedType::ENUM,
-                parquet::ConvertedType::Decimal => ConvertedType::DECIMAL,
-                parquet::ConvertedType::Date => ConvertedType::DATE,
-                parquet::ConvertedType::TimeMillis => 
ConvertedType::TIME_MILLIS,
-                parquet::ConvertedType::TimeMicros => 
ConvertedType::TIME_MICROS,
-                parquet::ConvertedType::TimestampMillis => {
+                parquet::ConvertedType::UTF8 => ConvertedType::UTF8,
+                parquet::ConvertedType::MAP => ConvertedType::MAP,
+                parquet::ConvertedType::MAP_KEY_VALUE => 
ConvertedType::MAP_KEY_VALUE,
+                parquet::ConvertedType::LIST => ConvertedType::LIST,
+                parquet::ConvertedType::ENUM => ConvertedType::ENUM,
+                parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL,
+                parquet::ConvertedType::DATE => ConvertedType::DATE,
+                parquet::ConvertedType::TIME_MILLIS => 
ConvertedType::TIME_MILLIS,
+                parquet::ConvertedType::TIME_MICROS => 
ConvertedType::TIME_MICROS,
+                parquet::ConvertedType::TIMESTAMP_MILLIS => {
                     ConvertedType::TIMESTAMP_MILLIS
                 }
-                parquet::ConvertedType::TimestampMicros => {
+                parquet::ConvertedType::TIMESTAMP_MICROS => {
                     ConvertedType::TIMESTAMP_MICROS
                 }
-                parquet::ConvertedType::Uint8 => ConvertedType::UINT_8,
-                parquet::ConvertedType::Uint16 => ConvertedType::UINT_16,
-                parquet::ConvertedType::Uint32 => ConvertedType::UINT_32,
-                parquet::ConvertedType::Uint64 => ConvertedType::UINT_64,
-                parquet::ConvertedType::Int8 => ConvertedType::INT_8,
-                parquet::ConvertedType::Int16 => ConvertedType::INT_16,
-                parquet::ConvertedType::Int32 => ConvertedType::INT_32,
-                parquet::ConvertedType::Int64 => ConvertedType::INT_64,
-                parquet::ConvertedType::Json => ConvertedType::JSON,
-                parquet::ConvertedType::Bson => ConvertedType::BSON,
-                parquet::ConvertedType::Interval => ConvertedType::INTERVAL,
+                parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8,
+                parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16,
+                parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32,
+                parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64,
+                parquet::ConvertedType::INT_8 => ConvertedType::INT_8,
+                parquet::ConvertedType::INT_16 => ConvertedType::INT_16,
+                parquet::ConvertedType::INT_32 => ConvertedType::INT_32,
+                parquet::ConvertedType::INT_64 => ConvertedType::INT_64,
+                parquet::ConvertedType::JSON => ConvertedType::JSON,
+                parquet::ConvertedType::BSON => ConvertedType::BSON,
+                parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL,
+                _ => return Err(general_err!("unexpected converted type: {}", 
value.0)),

Review Comment:
   ```suggestion
                   _ => return Err(general_err!("unexpected parquet converted 
type: {}", value.0)),
   ```



##########
parquet/CONTRIBUTING.md:
##########
@@ -60,7 +60,18 @@ Run `cargo bench` for benchmarks.
 To build documentation, run `cargo doc --no-deps`.
 To compile and view in the browser, run `cargo doc --no-deps --open`.
 
-## Update Supported Parquet Version
+## Update Parquet Format
 
-To update Parquet format to a newer version, check if 
[parquet-format](https://github.com/sunchao/parquet-format-rs)
-version is available. Then simply update version of `parquet-format` crate in 
Cargo.toml.
+To generate the parquet format code run
+
+```
+$ git clone https://github.com/apache/thrift
+$ cd thrift
+$ git checkout v0.16.0
+# docker build just builds a docker image with thrift dependencies
+$ docker build -t thrift build/docker/ubuntu-bionic
+# build/docker/scripts/cmake.sh actually compiles thrift
+$ docker run -v $(pwd):/thrift/src -it thrift build/docker/scripts/cmake.sh && 
wget 
https://raw.githubusercontent.com/apache/parquet-format/apache-parquet-format-2.9.0/src/main/thrift/parquet.thrift
 && ./cmake_build/compiler/cpp/bin/thrift --gen rs parquet.thrift

Review Comment:
   This command did not complete successfully for me. 
   
   ```shell
   ...
   441: ----------------------------------------------------------------------
   441: Ran 2 tests in 0.012s
   441: 
   441: OK
   441: Traceback (most recent call last):
   441:   File "/thrift/src/test/py/TestServer.py", line 403, in <module>
   441:     from thrift.TMultiplexedProcessor import TMultiplexedProcessor
   441:   File 
"/thrift/src/lib/py/build/lib.linux-x86_64-2.7/thrift/TMultiplexedProcessor.py",
 line 20, in <module>
   441:     from thrift.Thrift import TProcessor, TMessageType
   441: ImportError: No module named Thrift
   441: t.py
   441: ----
   441: ----------------
   441:  Executing Client/Server tests with various generated code directories
   441:  Servers to be tested: TSimpleServer, TThreadedServer, 
TThreadPoolServer, TNonblockingServer, THttpServer, TProcessPoolServer, 
TForkingServer
   441:  Directories to be tested: gen-py-default, gen-py-slots, 
gen-py-oldstyle, gen-py-no_utf8strings, gen-py-dynamic, gen-py-dynamicslots
   441:  Protocols to be tested: accel, accelc, binary, compact, json, header
   441:  Options to be tested: ZLIB(yes/no), SSL(yes/no)
   441: ----------------
   441: 
   441: Test run #0:  (includes gen-py-default) Server=TSimpleServer,  
Proto=accel,  zlib=False,  SSL=False
   441: Testing server TSimpleServer: /usr/bin/python 
/thrift/src/test/py/TestServer.py --protocol=accel --port=9090 TSimpleServer
   441: FAIL: Server process (/usr/bin/python /thrift/src/test/py/TestServer.py 
--protocol=accel --port=9090 TSimpleServer) failed with retcode 1
   441: Traceback (most recent call last):
   441:   File "/thrift/src/test/py/RunClientServer.py", line 323, in <module>
   441:     sys.exit(main())
   441:   File "/thrift/src/test/py/RunClientServer.py", line 315, in main
   441:     tests.test_feature('gendir', generated_dirs)
   441:   File "/thrift/src/test/py/RunClientServer.py", line 230, in 
test_feature
   441:     if self.run(conf, test_count):
   441:   File "/thrift/src/test/py/RunClientServer.py", line 219, in run
   441:     runServiceTest(self.libdir, self.genbase, genpydir, try_server, 
try_proto, self.port, with_zlib, with_ssl, self.verbose)
   441:   File "/thrift/src/test/py/RunClientServer.py", line 157, in 
runServiceTest
   441:     ensureServerAlive()
   441:   File "/thrift/src/test/py/RunClientServer.py", line 140, in 
ensureServerAlive
   441:     % (server_class, ' '.join(server_args)))
   441: Exception: Server subprocess TSimpleServer died, args: /usr/bin/python 
/thrift/src/test/py/TestServer.py --protocol=accel --port=9090 TSimpleServer
   441/441 Test #441: python_test .......................***Failed   28.49 sec
   
   99% tests passed, 6 tests failed out of 441
   
   Total Test time (real) = 250.09 sec
   
   The following tests FAILED:
        382 - TInterruptTest (Failed)
        401 - TNonblockingSSLServerTest (Failed)
        403 - SecurityTest (Failed)
        404 - SecurityFromBufferTest (Failed)
        433 - PythonTestSSLSocket (Failed)
        441 - python_test (Failed)
   Errors while running CTest
   ```



##########
parquet/src/basic.rs:
##########
@@ -730,113 +736,129 @@ impl From<Option<LogicalType>> for ConvertedType {
 // ----------------------------------------------------------------------
 // parquet::FieldRepetitionType <=> Repetition conversion
 
-impl From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
+impl TryFrom<parquet::FieldRepetitionType> for Repetition {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
+        Ok(match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+            _ => return Err(general_err!("unexpected repetition type: {}", 
value.0)),
+        })
     }
 }
 
 impl From<Repetition> for parquet::FieldRepetitionType {
     fn from(value: Repetition) -> Self {
         match value {
-            Repetition::REQUIRED => parquet::FieldRepetitionType::Required,
-            Repetition::OPTIONAL => parquet::FieldRepetitionType::Optional,
-            Repetition::REPEATED => parquet::FieldRepetitionType::Repeated,
+            Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED,
+            Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL,
+            Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED,
         }
     }
 }
 
 // ----------------------------------------------------------------------
 // parquet::Encoding <=> Encoding conversion
 
-impl From<parquet::Encoding> for Encoding {
-    fn from(value: parquet::Encoding) -> Self {
-        match value {
-            parquet::Encoding::Plain => Encoding::PLAIN,
-            parquet::Encoding::PlainDictionary => Encoding::PLAIN_DICTIONARY,
-            parquet::Encoding::Rle => Encoding::RLE,
-            parquet::Encoding::BitPacked => Encoding::BIT_PACKED,
-            parquet::Encoding::DeltaBinaryPacked => 
Encoding::DELTA_BINARY_PACKED,
-            parquet::Encoding::DeltaLengthByteArray => 
Encoding::DELTA_LENGTH_BYTE_ARRAY,
-            parquet::Encoding::DeltaByteArray => Encoding::DELTA_BYTE_ARRAY,
-            parquet::Encoding::RleDictionary => Encoding::RLE_DICTIONARY,
-            parquet::Encoding::ByteStreamSplit => Encoding::BYTE_STREAM_SPLIT,
-        }
+impl TryFrom<parquet::Encoding> for Encoding {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::Encoding) -> Result<Self> {
+        Ok(match value {
+            parquet::Encoding::PLAIN => Encoding::PLAIN,
+            parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY,
+            parquet::Encoding::RLE => Encoding::RLE,
+            parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED,
+            parquet::Encoding::DELTA_BINARY_PACKED => 
Encoding::DELTA_BINARY_PACKED,
+            parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => {
+                Encoding::DELTA_LENGTH_BYTE_ARRAY
+            }
+            parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY,
+            parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY,
+            parquet::Encoding::BYTE_STREAM_SPLIT => 
Encoding::BYTE_STREAM_SPLIT,
+            _ => return Err(general_err!("unexpected encoding: {}", value.0)),

Review Comment:
   ```suggestion
               _ => return Err(general_err!("unexpected parquet encoding: {}", 
value.0)),
   ```



##########
parquet/CONTRIBUTING.md:
##########
@@ -60,7 +60,18 @@ Run `cargo bench` for benchmarks.
 To build documentation, run `cargo doc --no-deps`.
 To compile and view in the browser, run `cargo doc --no-deps --open`.
 
-## Update Supported Parquet Version
+## Update Parquet Format
 
-To update Parquet format to a newer version, check if 
[parquet-format](https://github.com/sunchao/parquet-format-rs)
-version is available. Then simply update version of `parquet-format` crate in 
Cargo.toml.
+To generate the parquet format code run
+
+```
+$ git clone https://github.com/apache/thrift
+$ cd thrift
+$ git checkout v0.16.0
+# docker build just builds a docker image with thrift dependencies
+$ docker build -t thrift build/docker/ubuntu-bionic

Review Comment:
   Building this image took quite some time (what a lot of build dependencies!)



##########
parquet/src/basic.rs:
##########
@@ -730,113 +736,129 @@ impl From<Option<LogicalType>> for ConvertedType {
 // ----------------------------------------------------------------------
 // parquet::FieldRepetitionType <=> Repetition conversion
 
-impl From<parquet::FieldRepetitionType> for Repetition {
-    fn from(value: parquet::FieldRepetitionType) -> Self {
-        match value {
-            parquet::FieldRepetitionType::Required => Repetition::REQUIRED,
-            parquet::FieldRepetitionType::Optional => Repetition::OPTIONAL,
-            parquet::FieldRepetitionType::Repeated => Repetition::REPEATED,
-        }
+impl TryFrom<parquet::FieldRepetitionType> for Repetition {
+    type Error = ParquetError;
+
+    fn try_from(value: parquet::FieldRepetitionType) -> Result<Self> {
+        Ok(match value {
+            parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED,
+            parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL,
+            parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED,
+            _ => return Err(general_err!("unexpected repetition type: {}", 
value.0)),

Review Comment:
   I am suggesting adding `parquet` to all the errors thinking of the poor end 
user who will see this without the potential stack to know what the context is



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to