This is an automated email from the ASF dual-hosted git repository.
etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f3baa809ef [thrift-remodel] Add macro to reduce boilerplate necessary
to implement Thrift serialization (#8634)
f3baa809ef is described below
commit f3baa809ef1a21fcfa23268ec7c92896353d94f2
Author: Ed Seidl <[email protected]>
AuthorDate: Thu Oct 16 08:49:45 2025 -0700
[thrift-remodel] Add macro to reduce boilerplate necessary to implement
Thrift serialization (#8634)
# Which issue does this PR close?
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
- Part of #5853.
# Rationale for this change
Structures that are to be Thrift serialized as fields in another struct
must implement the `WriteThriftField` trait. In most instances the
implementation of that trait is trivial and adds unnecessary verbosity
to the code.
# What changes are included in this PR?
Add a new `write_thrift_field` macro that generates this boilerplate
code.
# Are these changes tested?
Covered by existing tests
# Are there any user-facing changes?
No
---
parquet/THRIFT.md | 26 ++++++++
parquet/src/basic.rs | 54 ++---------------
parquet/src/file/metadata/thrift/mod.rs | 31 +++-------
parquet/src/parquet_macros.rs | 15 +++++
parquet/src/parquet_thrift.rs | 103 ++++----------------------------
5 files changed, 65 insertions(+), 164 deletions(-)
diff --git a/parquet/THRIFT.md b/parquet/THRIFT.md
index 06e97709cc..5636566507 100644
--- a/parquet/THRIFT.md
+++ b/parquet/THRIFT.md
@@ -406,6 +406,32 @@ optional fields it is. A typical `write_thrift`
implementation will look like:
}
```
+In most instances, the `WriteThriftField` implementation can be handled by the
`write_thrift_field`
+macro. The first argument is the unqualified name of an object that implements
`WriteThrift`, and
+the second is the field type (which will be `FieldType::Struct` for Thrift
structs and unions,
+and `FieldType::I32` for Thrift enums).
+
+```rust
+write_thrift_field!(MyNewStruct, FieldType::Struct);
+```
+
+which expands to:
+
+```rust
+impl WriteThriftField for MyNewStruct {
+ fn write_thrift_field<W: Write>(
+ &self,
+ writer: &mut ThriftCompactOutputProtocol<W>,
+ field_id: i16,
+ last_field_id: i16,
+ ) -> Result<i16> {
+ writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
+ self.write_thrift(writer)?;
+ Ok(field_id)
+ }
+}
+```
+
### Handling for lists
Lists of serialized objects can usually be read using
`parquet_thrift::read_thrift_vec` and written
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 0220b1b55d..bd42f043ee 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -30,7 +30,7 @@ use crate::parquet_thrift::{
ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol,
ThriftCompactOutputProtocol,
WriteThrift, WriteThriftField,
};
-use crate::{thrift_enum, thrift_struct, thrift_union_all_empty};
+use crate::{thrift_enum, thrift_struct, thrift_union_all_empty,
write_thrift_field};
use crate::errors::{ParquetError, Result};
@@ -210,18 +210,7 @@ impl WriteThrift for ConvertedType {
}
}
-impl WriteThriftField for ConvertedType {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+write_thrift_field!(ConvertedType, FieldType::I32);
// ----------------------------------------------------------------------
// Mirrors thrift union `TimeUnit`
@@ -584,18 +573,7 @@ impl WriteThrift for LogicalType {
}
}
-impl WriteThriftField for LogicalType {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+write_thrift_field!(LogicalType, FieldType::Struct);
// ----------------------------------------------------------------------
// Mirrors thrift enum `FieldRepetitionType`
@@ -928,18 +906,7 @@ impl WriteThrift for Compression {
}
}
-impl WriteThriftField for Compression {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+write_thrift_field!(Compression, FieldType::I32);
impl Compression {
/// Returns the codec type of this compression setting as a string,
without the compression
@@ -1116,18 +1083,7 @@ impl WriteThrift for EdgeInterpolationAlgorithm {
}
}
-impl WriteThriftField for EdgeInterpolationAlgorithm {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+write_thrift_field!(EdgeInterpolationAlgorithm, FieldType::I32);
impl Default for EdgeInterpolationAlgorithm {
fn default() -> Self {
diff --git a/parquet/src/file/metadata/thrift/mod.rs
b/parquet/src/file/metadata/thrift/mod.rs
index ba707baa76..1477491096 100644
--- a/parquet/src/file/metadata/thrift/mod.rs
+++ b/parquet/src/file/metadata/thrift/mod.rs
@@ -58,6 +58,7 @@ use crate::{
},
thrift_struct,
util::bit_util::FromBytes,
+ write_thrift_field,
};
// this needs to be visible to the schema conversion code
@@ -1521,18 +1522,9 @@ impl WriteThrift for
crate::geospatial::statistics::GeospatialStatistics {
}
}
-impl WriteThriftField for crate::geospatial::statistics::GeospatialStatistics {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+// macro cannot handle qualified names
+use crate::geospatial::statistics::GeospatialStatistics as
RustGeospatialStatistics;
+write_thrift_field!(RustGeospatialStatistics, FieldType::Struct);
// struct BoundingBox {
// 1: required double xmin;
@@ -1570,18 +1562,9 @@ impl WriteThrift for
crate::geospatial::bounding_box::BoundingBox {
}
}
-impl WriteThriftField for crate::geospatial::bounding_box::BoundingBox {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?;
- self.write_thrift(writer)?;
- Ok(field_id)
- }
-}
+// macro cannot handle qualified names
+use crate::geospatial::bounding_box::BoundingBox as RustBoundingBox;
+write_thrift_field!(RustBoundingBox, FieldType::Struct);
#[cfg(test)]
pub(crate) mod tests {
diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs
index 22c61df36a..eb8bc2b7f0 100644
--- a/parquet/src/parquet_macros.rs
+++ b/parquet/src/parquet_macros.rs
@@ -293,6 +293,21 @@ macro_rules! thrift_struct {
}
}
+#[doc(hidden)]
+#[macro_export]
+/// Generate `WriteThriftField` implementation for a struct.
+macro_rules! write_thrift_field {
+ ($identifier:ident $(< $lt:lifetime >)?, $fld_type:expr) => {
+ impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? {
+ fn write_thrift_field<W: Write>(&self, writer: &mut
ThriftCompactOutputProtocol<W>, field_id: i16, last_field_id: i16) ->
Result<i16> {
+ writer.write_field_begin($fld_type, field_id, last_field_id)?;
+ self.write_thrift(writer)?;
+ Ok(field_id)
+ }
+ }
+ }
+}
+
#[doc(hidden)]
#[macro_export]
macro_rules! __thrift_write_required_or_optional_field {
diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs
index e27c7d16ef..221532ea83 100644
--- a/parquet/src/parquet_thrift.rs
+++ b/parquet/src/parquet_thrift.rs
@@ -31,7 +31,10 @@ use std::{
io::{Read, Write},
};
-use crate::errors::{ParquetError, Result};
+use crate::{
+ errors::{ParquetError, Result},
+ write_thrift_field,
+};
/// Wrapper for thrift `double` fields. This is used to provide
/// an implementation of `Eq` for floats. This implementation
@@ -913,6 +916,7 @@ pub(crate) trait WriteThriftField {
) -> Result<i16>;
}
+// bool struct fields are written differently to bool values
impl WriteThriftField for bool {
fn write_thrift_field<W: Write>(
&self,
@@ -929,83 +933,13 @@ impl WriteThriftField for bool {
}
}
-impl WriteThriftField for i8 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Byte, field_id, last_field_id)?;
- writer.write_i8(*self)?;
- Ok(field_id)
- }
-}
-
-impl WriteThriftField for i16 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I16, field_id, last_field_id)?;
- writer.write_i16(*self)?;
- Ok(field_id)
- }
-}
-
-impl WriteThriftField for i32 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I32, field_id, last_field_id)?;
- writer.write_i32(*self)?;
- Ok(field_id)
- }
-}
-
-impl WriteThriftField for i64 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::I64, field_id, last_field_id)?;
- writer.write_i64(*self)?;
- Ok(field_id)
- }
-}
-
-impl WriteThriftField for OrderedF64 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
- writer.write_double(self.0)?;
- Ok(field_id)
- }
-}
-
-impl WriteThriftField for f64 {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Double, field_id, last_field_id)?;
- writer.write_double(*self)?;
- Ok(field_id)
- }
-}
+write_thrift_field!(i8, FieldType::Byte);
+write_thrift_field!(i16, FieldType::I16);
+write_thrift_field!(i32, FieldType::I32);
+write_thrift_field!(i64, FieldType::I64);
+write_thrift_field!(OrderedF64, FieldType::Double);
+write_thrift_field!(f64, FieldType::Double);
+write_thrift_field!(String, FieldType::Binary);
impl WriteThriftField for &[u8] {
fn write_thrift_field<W: Write>(
@@ -1033,19 +967,6 @@ impl WriteThriftField for &str {
}
}
-impl WriteThriftField for String {
- fn write_thrift_field<W: Write>(
- &self,
- writer: &mut ThriftCompactOutputProtocol<W>,
- field_id: i16,
- last_field_id: i16,
- ) -> Result<i16> {
- writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?;
- writer.write_bytes(self.as_bytes())?;
- Ok(field_id)
- }
-}
-
impl<T> WriteThriftField for Vec<T>
where
T: WriteThrift,