This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new e7c37ded17 Add helper functions to create `LogicalType` struct
variants (#9996)
e7c37ded17 is described below
commit e7c37ded17298172ebfbe349d9044ef100f82e40
Author: Ed Seidl <[email protected]>
AuthorDate: Wed May 20 11:44:15 2026 -0700
Add helper functions to create `LogicalType` struct variants (#9996)
# Which issue does this PR close?
- Part of #9995.
# Rationale for this change
Before switching `LogicalType` from struct variants to tuple variants,
add some helper functions that will hide some of the increase in
complexity.
# What changes are included in this PR?
Adds functions to the `LogicalType` impl for creating instances of the
non-unit variants (`Integer`, `Decimal`, `Time`, `Timestamp`, `Variant`,
`Geometry`, `Geography`).
# Are these changes tested?
Should be covered by existing tests.
# Are there any user-facing changes?
Adds to the `LogicalType` API
---
parquet/src/arrow/schema/extension.rs | 18 +-
parquet/src/arrow/schema/mod.rs | 59 ++----
parquet/src/basic.rs | 382 ++++++++++++----------------------
parquet/src/column/writer/mod.rs | 5 +-
parquet/src/file/writer.rs | 5 +-
parquet/src/geospatial/accumulator.rs | 7 +-
parquet/src/schema/parser.rs | 57 ++---
parquet/src/schema/printer.rs | 71 ++-----
parquet/src/schema/types.rs | 20 +-
parquet/src/variant.rs | 4 +-
parquet_derive/src/parquet_field.rs | 40 +---
11 files changed, 209 insertions(+), 459 deletions(-)
diff --git a/parquet/src/arrow/schema/extension.rs
b/parquet/src/arrow/schema/extension.rs
index a2e9c32dee..0244c1b6bb 100644
--- a/parquet/src/arrow/schema/extension.rs
+++ b/parquet/src/arrow/schema/extension.rs
@@ -112,9 +112,7 @@ pub(crate) fn has_extension_type(parquet_type: &Type) ->
bool {
pub(crate) fn logical_type_for_struct(field: &Field) -> Option<LogicalType> {
use parquet_variant_compute::VariantType;
if field.has_valid_extension_type::<VariantType>() {
- Some(LogicalType::Variant {
- specification_version: None,
- })
+ Some(LogicalType::variant(None))
} else {
None
}
@@ -167,13 +165,13 @@ pub(crate) fn logical_type_for_binary(field: &Field) ->
Option<LogicalType> {
match field.extension_type_name() {
Some(n) if n == WkbType::NAME => match
field.try_extension_type::<WkbType>() {
Ok(wkb_type) => match wkb_type.metadata().type_hint() {
- WkbTypeHint::Geometry => Some(LogicalType::Geometry {
- crs: wkb_type.metadata().crs.as_ref().map(|c|
c.to_string()),
- }),
- WkbTypeHint::Geography => Some(LogicalType::Geography {
- crs: wkb_type.metadata().crs.as_ref().map(|c|
c.to_string()),
- algorithm: wkb_type.metadata().algorithm.map(|a| a.into()),
- }),
+ WkbTypeHint::Geometry => Some(LogicalType::geometry(
+ wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
+ )),
+ WkbTypeHint::Geography => Some(LogicalType::geography(
+ wkb_type.metadata().crs.as_ref().map(|c| c.to_string()),
+ wkb_type.metadata().algorithm.map(|a| a.into()),
+ )),
},
Err(_e) => None,
},
diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs
index b2b93687ba..7fe6fbc9d9 100644
--- a/parquet/src/arrow/schema/mod.rs
+++ b/parquet/src/arrow/schema/mod.rs
@@ -556,18 +556,12 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
.with_id(id)
.build(),
DataType::Int8 => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- }))
+ .with_logical_type(Some(LogicalType::integer(8, true)))
.with_repetition(repetition)
.with_id(id)
.build(),
DataType::Int16 => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: true,
- }))
+ .with_logical_type(Some(LogicalType::integer(16, true)))
.with_repetition(repetition)
.with_id(id)
.build(),
@@ -580,34 +574,22 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
.with_id(id)
.build(),
DataType::UInt8 => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- }))
+ .with_logical_type(Some(LogicalType::integer(8, false)))
.with_repetition(repetition)
.with_id(id)
.build(),
DataType::UInt16 => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: false,
- }))
+ .with_logical_type(Some(LogicalType::integer(16, false)))
.with_repetition(repetition)
.with_id(id)
.build(),
DataType::UInt32 => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: false,
- }))
+ .with_logical_type(Some(LogicalType::integer(32, false)))
.with_repetition(repetition)
.with_id(id)
.build(),
DataType::UInt64 => Type::primitive_type_builder(name,
PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 64,
- is_signed: false,
- }))
+ .with_logical_type(Some(LogicalType::integer(64, false)))
.with_repetition(repetition)
.with_id(id)
.build(),
@@ -634,16 +616,16 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
}
DataType::Timestamp(time_unit, tz) => {
Type::primitive_type_builder(name, PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Timestamp {
+ .with_logical_type(Some(LogicalType::timestamp(
// If timezone set, values are normalized to UTC timezone
- is_adjusted_to_u_t_c: matches!(tz, Some(z) if
!z.as_ref().is_empty()),
- unit: match time_unit {
+ matches!(tz, Some(z) if !z.as_ref().is_empty()),
+ match time_unit {
TimeUnit::Second => unreachable!(),
TimeUnit::Millisecond => ParquetTimeUnit::MILLIS,
TimeUnit::Microsecond => ParquetTimeUnit::MICROS,
TimeUnit::Nanosecond => ParquetTimeUnit::NANOS,
},
- }))
+ )))
.with_repetition(repetition)
.with_id(id)
.build()
@@ -675,25 +657,25 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
.build()
}
DataType::Time32(unit) => Type::primitive_type_builder(name,
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Time {
- is_adjusted_to_u_t_c:
field.metadata().contains_key("adjusted_to_utc"),
- unit: match unit {
+ .with_logical_type(Some(LogicalType::time(
+ field.metadata().contains_key("adjusted_to_utc"),
+ match unit {
TimeUnit::Millisecond => ParquetTimeUnit::MILLIS,
u => unreachable!("Invalid unit for Time32: {:?}", u),
},
- }))
+ )))
.with_repetition(repetition)
.with_id(id)
.build(),
DataType::Time64(unit) => Type::primitive_type_builder(name,
PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Time {
- is_adjusted_to_u_t_c:
field.metadata().contains_key("adjusted_to_utc"),
- unit: match unit {
+ .with_logical_type(Some(LogicalType::time(
+ field.metadata().contains_key("adjusted_to_utc"),
+ match unit {
TimeUnit::Microsecond => ParquetTimeUnit::MICROS,
TimeUnit::Nanosecond => ParquetTimeUnit::NANOS,
u => unreachable!("Invalid unit for Time64: {:?}", u),
},
- }))
+ )))
.with_repetition(repetition)
.with_id(id)
.build(),
@@ -749,10 +731,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types:
bool) -> Result<Type> {
.with_repetition(repetition)
.with_id(id)
.with_length(length)
- .with_logical_type(Some(LogicalType::Decimal {
- scale: *scale as i32,
- precision: *precision as i32,
- }))
+ .with_logical_type(Some(LogicalType::decimal(*scale as i32,
*precision as i32)))
.with_precision(*precision as i32)
.with_scale(*scale as i32)
.build()
diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs
index 17e5b9c321..796779358c 100644
--- a/parquet/src/basic.rs
+++ b/parquet/src/basic.rs
@@ -314,6 +314,54 @@ pub enum LogicalType {
},
}
+impl LogicalType {
+ /// Create a [`LogicalType::Integer`] variant with the given `bit_width`
and `is_signed`
+ pub fn integer(bit_width: i8, is_signed: bool) -> Self {
+ Self::Integer {
+ bit_width,
+ is_signed,
+ }
+ }
+
+ /// Create a [`LogicalType::Decimal`] variant with the given `scale` and
`precision`
+ pub fn decimal(scale: i32, precision: i32) -> Self {
+ Self::Decimal { scale, precision }
+ }
+
+ /// Create a [`LogicalType::Time`] variant with the given
`is_adjusted_to_u_t_c` and `unit`
+ pub fn time(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
+ Self::Time {
+ is_adjusted_to_u_t_c,
+ unit,
+ }
+ }
+
+ /// Create a [`LogicalType::Timestamp`] variant with the given
`is_adjusted_to_u_t_c` and `unit`
+ pub fn timestamp(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> Self {
+ Self::Timestamp {
+ is_adjusted_to_u_t_c,
+ unit,
+ }
+ }
+
+ /// Create a [`LogicalType::Variant`] variant with the given
`specification_version`
+ pub fn variant(specification_version: Option<i8>) -> Self {
+ Self::Variant {
+ specification_version,
+ }
+ }
+
+ /// Create a [`LogicalType::Geometry`] variant with the given `crs`
+ pub fn geometry(crs: Option<String>) -> Self {
+ Self::Geometry { crs }
+ }
+
+ /// Create a [`LogicalType::Geography`] variant with the given `crs` and
`algorithm`
+ pub fn geography(crs: Option<String>, algorithm:
Option<EdgeInterpolationAlgorithm>) -> Self {
+ Self::Geography { crs, algorithm }
+ }
+}
+
impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType {
fn read_thrift(prot: &mut R) -> Result<Self> {
let field_ident = prot.read_field_begin(0)?;
@@ -339,10 +387,7 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a,
R> for LogicalType {
}
5 => {
let val = DecimalType::read_thrift(&mut *prot)?;
- Self::Decimal {
- scale: val.scale,
- precision: val.precision,
- }
+ Self::decimal(val.scale, val.precision)
}
6 => {
prot.skip_empty_struct()?;
@@ -350,24 +395,15 @@ impl<'a, R: ThriftCompactInputProtocol<'a>>
ReadThrift<'a, R> for LogicalType {
}
7 => {
let val = TimeType::read_thrift(&mut *prot)?;
- Self::Time {
- is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
- unit: val.unit,
- }
+ Self::time(val.is_adjusted_to_u_t_c, val.unit)
}
8 => {
let val = TimestampType::read_thrift(&mut *prot)?;
- Self::Timestamp {
- is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c,
- unit: val.unit,
- }
+ Self::timestamp(val.is_adjusted_to_u_t_c, val.unit)
}
10 => {
let val = IntType::read_thrift(&mut *prot)?;
- Self::Integer {
- is_signed: val.is_signed,
- bit_width: val.bit_width,
- }
+ Self::integer(val.bit_width, val.is_signed)
}
11 => {
prot.skip_empty_struct()?;
@@ -391,15 +427,11 @@ impl<'a, R: ThriftCompactInputProtocol<'a>>
ReadThrift<'a, R> for LogicalType {
}
16 => {
let val = VariantType::read_thrift(&mut *prot)?;
- Self::Variant {
- specification_version: val.specification_version,
- }
+ Self::variant(val.specification_version)
}
17 => {
let val = GeometryType::read_thrift(&mut *prot)?;
- Self::Geometry {
- crs: val.crs.map(|s| s.to_owned()),
- }
+ Self::geometry(val.crs.map(|s| s.to_owned()))
}
18 => {
let val = GeographyType::read_thrift(&mut *prot)?;
@@ -408,10 +440,7 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a,
R> for LogicalType {
let algorithm = val
.algorithm
.unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL);
- Self::Geography {
- crs: val.crs.map(|s| s.to_owned()),
- algorithm: Some(algorithm),
- }
+ Self::geography(val.crs.map(|s| s.to_owned()), Some(algorithm))
}
_ => {
prot.skip(field_ident.field_type)?;
@@ -1511,26 +1540,14 @@ impl str::FromStr for LogicalType {
fn from_str(s: &str) -> Result<Self> {
match s {
// The type is a placeholder that gets updated elsewhere
- "INTEGER" => Ok(LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- }),
+ "INTEGER" => Ok(LogicalType::integer(8, false)),
"MAP" => Ok(LogicalType::Map),
"LIST" => Ok(LogicalType::List),
"ENUM" => Ok(LogicalType::Enum),
- "DECIMAL" => Ok(LogicalType::Decimal {
- precision: -1,
- scale: -1,
- }),
+ "DECIMAL" => Ok(LogicalType::decimal(-1, -1)),
"DATE" => Ok(LogicalType::Date),
- "TIME" => Ok(LogicalType::Time {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MILLIS,
- }),
- "TIMESTAMP" => Ok(LogicalType::Timestamp {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MILLIS,
- }),
+ "TIME" => Ok(LogicalType::time(false, TimeUnit::MILLIS)),
+ "TIMESTAMP" => Ok(LogicalType::timestamp(false, TimeUnit::MILLIS)),
"STRING" => Ok(LogicalType::String),
"JSON" => Ok(LogicalType::Json),
"BSON" => Ok(LogicalType::Bson),
@@ -1540,11 +1557,12 @@ impl str::FromStr for LogicalType {
"Interval parquet logical type not yet supported"
)),
"FLOAT16" => Ok(LogicalType::Float16),
- "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }),
- "GEOGRAPHY" => Ok(LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
- }),
+ "VARIANT" => Ok(LogicalType::variant(None)),
+ "GEOMETRY" => Ok(LogicalType::geometry(None)),
+ "GEOGRAPHY" => Ok(LogicalType::geography(
+ None,
+ Some(EdgeInterpolationAlgorithm::SPHERICAL),
+ )),
other => Err(general_err!("Invalid parquet logical type {}",
other)),
}
}
@@ -1860,10 +1878,7 @@ mod tests {
let logical_none: Option<LogicalType> = None;
assert_eq!(ConvertedType::from(logical_none), ConvertedType::NONE);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Decimal {
- precision: 20,
- scale: 5
- })),
+ ConvertedType::from(Some(LogicalType::decimal(5, 20))),
ConvertedType::DECIMAL
);
assert_eq!(
@@ -1883,101 +1898,59 @@ mod tests {
ConvertedType::DATE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Time {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: true,
- })),
+ ConvertedType::from(Some(LogicalType::time(true,
TimeUnit::MILLIS))),
ConvertedType::TIME_MILLIS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Time {
- unit: TimeUnit::MICROS,
- is_adjusted_to_u_t_c: true,
- })),
+ ConvertedType::from(Some(LogicalType::time(true,
TimeUnit::MICROS))),
ConvertedType::TIME_MICROS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Time {
- unit: TimeUnit::NANOS,
- is_adjusted_to_u_t_c: false,
- })),
+ ConvertedType::from(Some(LogicalType::time(false,
TimeUnit::NANOS))),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Timestamp {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: true,
- })),
+ ConvertedType::from(Some(LogicalType::timestamp(true,
TimeUnit::MILLIS))),
ConvertedType::TIMESTAMP_MILLIS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Timestamp {
- unit: TimeUnit::MICROS,
- is_adjusted_to_u_t_c: false,
- })),
+ ConvertedType::from(Some(LogicalType::timestamp(false,
TimeUnit::MICROS))),
ConvertedType::TIMESTAMP_MICROS
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Timestamp {
- unit: TimeUnit::NANOS,
- is_adjusted_to_u_t_c: false,
- })),
+ ConvertedType::from(Some(LogicalType::timestamp(false,
TimeUnit::NANOS))),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: false
- })),
+ ConvertedType::from(Some(LogicalType::integer(8, false))),
ConvertedType::UINT_8
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: true
- })),
+ ConvertedType::from(Some(LogicalType::integer(8, true))),
ConvertedType::INT_8
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: false
- })),
+ ConvertedType::from(Some(LogicalType::integer(16, false))),
ConvertedType::UINT_16
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: true
- })),
+ ConvertedType::from(Some(LogicalType::integer(16, true))),
ConvertedType::INT_16
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: false
- })),
+ ConvertedType::from(Some(LogicalType::integer(32, false))),
ConvertedType::UINT_32
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: true
- })),
+ ConvertedType::from(Some(LogicalType::integer(32, true))),
ConvertedType::INT_32
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 64,
- is_signed: false
- })),
+ ConvertedType::from(Some(LogicalType::integer(64, false))),
ConvertedType::UINT_64
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Integer {
- bit_width: 64,
- is_signed: true
- })),
+ ConvertedType::from(Some(LogicalType::integer(64, true))),
ConvertedType::INT_64
);
assert_eq!(
@@ -2001,14 +1974,15 @@ mod tests {
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Geometry { crs: None })),
+ ConvertedType::from(Some(LogicalType::variant(None))),
ConvertedType::NONE
);
assert_eq!(
- ConvertedType::from(Some(LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::default()),
- })),
+ ConvertedType::from(Some(LogicalType::geometry(None))),
+ ConvertedType::NONE
+ );
+ assert_eq!(
+ ConvertedType::from(Some(LogicalType::geography(None,
Some(Default::default())))),
ConvertedType::NONE
);
assert_eq!(
@@ -2023,81 +1997,42 @@ mod tests {
test_roundtrip(LogicalType::Map);
test_roundtrip(LogicalType::List);
test_roundtrip(LogicalType::Enum);
- test_roundtrip(LogicalType::Decimal {
- scale: 0,
- precision: 20,
- });
+ test_roundtrip(LogicalType::decimal(0, 20));
test_roundtrip(LogicalType::Date);
- test_roundtrip(LogicalType::Time {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::MICROS,
- });
- test_roundtrip(LogicalType::Time {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MILLIS,
- });
- test_roundtrip(LogicalType::Time {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::NANOS,
- });
- test_roundtrip(LogicalType::Timestamp {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MICROS,
- });
- test_roundtrip(LogicalType::Timestamp {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::MILLIS,
- });
- test_roundtrip(LogicalType::Timestamp {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::NANOS,
- });
- test_roundtrip(LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- });
- test_roundtrip(LogicalType::Integer {
- bit_width: 16,
- is_signed: false,
- });
- test_roundtrip(LogicalType::Integer {
- bit_width: 32,
- is_signed: true,
- });
- test_roundtrip(LogicalType::Integer {
- bit_width: 64,
- is_signed: false,
- });
+ test_roundtrip(LogicalType::time(true, TimeUnit::MICROS));
+ test_roundtrip(LogicalType::time(false, TimeUnit::MILLIS));
+ test_roundtrip(LogicalType::time(false, TimeUnit::NANOS));
+ test_roundtrip(LogicalType::timestamp(false, TimeUnit::MICROS));
+ test_roundtrip(LogicalType::timestamp(true, TimeUnit::MILLIS));
+ test_roundtrip(LogicalType::timestamp(true, TimeUnit::NANOS));
+ test_roundtrip(LogicalType::integer(8, true));
+ test_roundtrip(LogicalType::integer(16, false));
+ test_roundtrip(LogicalType::integer(32, true));
+ test_roundtrip(LogicalType::integer(64, false));
test_roundtrip(LogicalType::Json);
test_roundtrip(LogicalType::Bson);
test_roundtrip(LogicalType::Uuid);
test_roundtrip(LogicalType::Float16);
- test_roundtrip(LogicalType::Variant {
- specification_version: Some(1),
- });
- test_roundtrip(LogicalType::Variant {
- specification_version: None,
- });
- test_roundtrip(LogicalType::Geometry {
- crs: Some("foo".to_owned()),
- });
- test_roundtrip(LogicalType::Geometry { crs: None });
- test_roundtrip(LogicalType::Geography {
- crs: Some("foo".to_owned()),
- algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER),
- });
- test_roundtrip(LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::KARNEY),
- });
- test_roundtrip(LogicalType::Geography {
- crs: Some("foo".to_owned()),
- algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
- });
- test_roundtrip(LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL),
- });
+ test_roundtrip(LogicalType::variant(Some(1)));
+ test_roundtrip(LogicalType::variant(None));
+ test_roundtrip(LogicalType::geometry(Some("foo".to_owned())));
+ test_roundtrip(LogicalType::geometry(None));
+ test_roundtrip(LogicalType::geography(
+ Some("foo".to_owned()),
+ Some(EdgeInterpolationAlgorithm::ANDOYER),
+ ));
+ test_roundtrip(LogicalType::geography(
+ None,
+ Some(EdgeInterpolationAlgorithm::KARNEY),
+ ));
+ test_roundtrip(LogicalType::geography(
+ Some("foo".to_owned()),
+ Some(EdgeInterpolationAlgorithm::SPHERICAL),
+ ));
+ test_roundtrip(LogicalType::geography(
+ None,
+ Some(EdgeInterpolationAlgorithm::SPHERICAL),
+ ));
}
#[test]
@@ -2291,72 +2226,27 @@ mod tests {
LogicalType::Bson,
LogicalType::Enum,
LogicalType::Uuid,
- LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- },
- LogicalType::Integer {
- bit_width: 16,
- is_signed: false,
- },
- LogicalType::Integer {
- bit_width: 32,
- is_signed: false,
- },
- LogicalType::Integer {
- bit_width: 64,
- is_signed: false,
- },
+ LogicalType::integer(8, false),
+ LogicalType::integer(16, false),
+ LogicalType::integer(32, false),
+ LogicalType::integer(64, false),
];
check_sort_order(unsigned, SortOrder::UNSIGNED);
// Signed comparison (physical type does not matter)
let signed = vec![
- LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- },
- LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- },
- LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- },
- LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- },
- LogicalType::Decimal {
- scale: 20,
- precision: 4,
- },
+ LogicalType::integer(8, true),
+ LogicalType::integer(16, true),
+ LogicalType::integer(32, true),
+ LogicalType::integer(64, true),
+ LogicalType::decimal(20, 4),
LogicalType::Date,
- LogicalType::Time {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MILLIS,
- },
- LogicalType::Time {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MICROS,
- },
- LogicalType::Time {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::NANOS,
- },
- LogicalType::Timestamp {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MILLIS,
- },
- LogicalType::Timestamp {
- is_adjusted_to_u_t_c: false,
- unit: TimeUnit::MICROS,
- },
- LogicalType::Timestamp {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::NANOS,
- },
+ LogicalType::time(false, TimeUnit::MILLIS),
+ LogicalType::time(false, TimeUnit::MICROS),
+ LogicalType::time(true, TimeUnit::NANOS),
+ LogicalType::timestamp(false, TimeUnit::MILLIS),
+ LogicalType::timestamp(false, TimeUnit::MICROS),
+ LogicalType::timestamp(true, TimeUnit::NANOS),
LogicalType::Float16,
];
check_sort_order(signed, SortOrder::SIGNED);
@@ -2365,11 +2255,9 @@ mod tests {
let undefined = vec![
LogicalType::List,
LogicalType::Map,
- LogicalType::Geometry { crs: None },
- LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::default()),
- },
+ LogicalType::variant(None),
+ LogicalType::geometry(None),
+ LogicalType::geography(None, Some(Default::default())),
];
check_sort_order(undefined, SortOrder::UNDEFINED);
}
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index 5d14ac6856..595eadbc90 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -4449,10 +4449,7 @@ mod tests {
let path = ColumnPath::from("col");
let tpe = SchemaType::primitive_type_builder("col",
T::get_physical_type())
.with_length(16)
- .with_logical_type(Some(LogicalType::Decimal {
- scale: 2,
- precision: 3,
- }))
+ .with_logical_type(Some(LogicalType::decimal(2, 3)))
.with_scale(2)
.with_precision(3)
.build()
diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs
index ec4d02a47d..942013ea62 100644
--- a/parquet/src/file/writer.rs
+++ b/parquet/src/file/writer.rs
@@ -1280,10 +1280,7 @@ mod tests {
#[test]
fn test_file_writer_v2_with_metadata() {
let file = tempfile::tempfile().unwrap();
- let field_logical_type = Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- });
+ let field_logical_type = Some(LogicalType::integer(8, false));
let field = Arc::new(
types::Type::primitive_type_builder("col1", Type::INT32)
.with_logical_type(field_logical_type.clone())
diff --git a/parquet/src/geospatial/accumulator.rs
b/parquet/src/geospatial/accumulator.rs
index d25a47930f..3aad1060e0 100644
--- a/parquet/src/geospatial/accumulator.rs
+++ b/parquet/src/geospatial/accumulator.rs
@@ -253,7 +253,7 @@ mod test {
// Check that we have a working accumulator for Geometry
let parquet_type = Type::primitive_type_builder("geom",
crate::basic::Type::BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::Geometry { crs: None }))
+ .with_logical_type(Some(LogicalType::geometry(None)))
.build()
.unwrap();
let column_descr =
@@ -271,10 +271,7 @@ mod test {
// Check that we have a void accumulator for Geography
let parquet_type = Type::primitive_type_builder("geom",
crate::basic::Type::BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::Geography {
- crs: None,
- algorithm: None,
- }))
+ .with_logical_type(Some(LogicalType::geography(None, None)))
.build()
.unwrap();
let column_descr =
diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs
index 36cf2dc517..071962aa4e 100644
--- a/parquet/src/schema/parser.rs
+++ b/parquet/src/schema/parser.rs
@@ -353,7 +353,7 @@ impl Parser<'_> {
} else {
scale = 0
}
- logical = Some(LogicalType::Decimal { scale,
precision });
+ logical = Some(LogicalType::decimal(scale,
precision));
converted = ConvertedType::from(logical.clone());
}
}
@@ -371,10 +371,7 @@ impl Parser<'_> {
"Failed to parse timezone info for TIME
type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::Time {
- is_adjusted_to_u_t_c,
- unit,
- });
+ logical =
Some(LogicalType::time(is_adjusted_to_u_t_c, unit));
converted =
ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -396,10 +393,7 @@ impl Parser<'_> {
"Failed to parse timezone info for
TIMESTAMP type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::Timestamp {
- is_adjusted_to_u_t_c,
- unit,
- });
+ logical =
Some(LogicalType::timestamp(is_adjusted_to_u_t_c, unit));
converted =
ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -446,10 +440,7 @@ impl Parser<'_> {
"Failed to parse is_signed for INTEGER
type",
)?;
assert_token(self.tokenizer.next(), ")")?;
- logical = Some(LogicalType::Integer {
- bit_width,
- is_signed,
- });
+ logical = Some(LogicalType::integer(bit_width,
is_signed));
converted =
ConvertedType::from(logical.clone());
} else {
// Invalid token for unit
@@ -833,10 +824,7 @@ mod tests {
.with_fields(vec![
Arc::new(
Type::primitive_type_builder("f1",
PhysicalType::FIXED_LEN_BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::Decimal {
- precision: 9,
- scale: 3,
- }))
+ .with_logical_type(Some(LogicalType::decimal(3, 9)))
.with_converted_type(ConvertedType::DECIMAL)
.with_length(5)
.with_precision(9)
@@ -846,10 +834,7 @@ mod tests {
),
Arc::new(
Type::primitive_type_builder("f2",
PhysicalType::FIXED_LEN_BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::Decimal {
- precision: 38,
- scale: 18,
- }))
+ .with_logical_type(Some(LogicalType::decimal(18, 38)))
.with_converted_type(ConvertedType::DECIMAL)
.with_length(16)
.with_precision(38)
@@ -1038,20 +1023,14 @@ mod tests {
Arc::new(
Type::primitive_type_builder("_1", PhysicalType::INT32)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- }))
+ .with_logical_type(Some(LogicalType::integer(8, true)))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_2", PhysicalType::INT32)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: false,
- }))
+ .with_logical_type(Some(LogicalType::integer(16, false)))
.build()
.unwrap(),
),
@@ -1075,37 +1054,25 @@ mod tests {
),
Arc::new(
Type::primitive_type_builder("_6", PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Time {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: false,
- }))
+ .with_logical_type(Some(LogicalType::time(false,
TimeUnit::MILLIS)))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_7", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Time {
- unit: TimeUnit::MICROS,
- is_adjusted_to_u_t_c: true,
- }))
+ .with_logical_type(Some(LogicalType::time(true,
TimeUnit::MICROS)))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_8", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Timestamp {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: true,
- }))
+ .with_logical_type(Some(LogicalType::timestamp(true,
TimeUnit::MILLIS)))
.build()
.unwrap(),
),
Arc::new(
Type::primitive_type_builder("_9", PhysicalType::INT64)
- .with_logical_type(Some(LogicalType::Timestamp {
- unit: TimeUnit::NANOS,
- is_adjusted_to_u_t_c: false,
- }))
+ .with_logical_type(Some(LogicalType::timestamp(false,
TimeUnit::NANOS)))
.build()
.unwrap(),
),
diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs
index 68398005b6..dbeddcfc12 100644
--- a/parquet/src/schema/printer.rs
+++ b/parquet/src/schema/printer.rs
@@ -457,7 +457,7 @@ mod tests {
use std::sync::Arc;
- use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as
PhysicalType};
+ use crate::basic::{Repetition, Type as PhysicalType};
use crate::errors::Result;
use crate::schema::parser::parse_message_type;
@@ -543,10 +543,7 @@ mod tests {
"field",
None,
PhysicalType::INT32,
- Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: true,
- }),
+ Some(LogicalType::integer(32, true)),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -558,10 +555,7 @@ mod tests {
"field",
None,
PhysicalType::INT32,
- Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- }),
+ Some(LogicalType::integer(8, false)),
ConvertedType::NONE,
Repetition::OPTIONAL,
)
@@ -573,10 +567,7 @@ mod tests {
"field",
None,
PhysicalType::INT32,
- Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: true,
- }),
+ Some(LogicalType::integer(16, true)),
ConvertedType::INT_16,
Repetition::REPEATED,
)
@@ -588,10 +579,7 @@ mod tests {
"field",
Some(42),
PhysicalType::INT32,
- Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: true,
- }),
+ Some(LogicalType::integer(16, true)),
ConvertedType::INT_16,
Repetition::REPEATED,
)
@@ -651,10 +639,7 @@ mod tests {
"field",
None,
PhysicalType::INT64,
- Some(LogicalType::Timestamp {
- is_adjusted_to_u_t_c: true,
- unit: TimeUnit::MILLIS,
- }),
+ Some(LogicalType::timestamp(true, TimeUnit::MILLIS)),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -678,10 +663,7 @@ mod tests {
"field",
None,
PhysicalType::INT32,
- Some(LogicalType::Time {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: false,
- }),
+ Some(LogicalType::time(false, TimeUnit::MILLIS)),
ConvertedType::TIME_MILLIS,
Repetition::REQUIRED,
)
@@ -693,10 +675,7 @@ mod tests {
"field",
Some(42),
PhysicalType::INT32,
- Some(LogicalType::Time {
- unit: TimeUnit::MILLIS,
- is_adjusted_to_u_t_c: false,
- }),
+ Some(LogicalType::time(false, TimeUnit::MILLIS)),
ConvertedType::TIME_MILLIS,
Repetition::REQUIRED,
)
@@ -792,7 +771,7 @@ mod tests {
"field",
None,
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::Geometry { crs: None }),
+ Some(LogicalType::geometry(None)),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -804,9 +783,7 @@ mod tests {
"field",
None,
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::Geometry {
- crs: Some("non-missing CRS".to_string()),
- }),
+ Some(LogicalType::geometry(Some("non-missing
CRS".to_string()))),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -818,10 +795,7 @@ mod tests {
"field",
None,
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::Geography {
- crs: None,
- algorithm: Some(EdgeInterpolationAlgorithm::default()),
- }),
+ Some(LogicalType::geography(None,
Some(Default::default()))),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -833,10 +807,10 @@ mod tests {
"field",
None,
PhysicalType::BYTE_ARRAY,
- Some(LogicalType::Geography {
- crs: Some("non-missing CRS".to_string()),
- algorithm: Some(EdgeInterpolationAlgorithm::default()),
- }),
+ Some(LogicalType::geography(
+ Some("non-missing CRS".to_string()),
+ Some(Default::default()),
+ )),
ConvertedType::NONE,
Repetition::REQUIRED,
)
@@ -887,10 +861,7 @@ mod tests {
),
(
Type::primitive_type_builder("decimal",
PhysicalType::FIXED_LEN_BYTE_ARRAY)
- .with_logical_type(Some(LogicalType::Decimal {
- precision: 32,
- scale: 20,
- }))
+ .with_logical_type(Some(LogicalType::decimal(20, 32)))
.with_precision(32)
.with_scale(20)
.with_length(decimal_length_from_precision(32))
@@ -1178,10 +1149,7 @@ mod tests {
fn test_print_and_parse_decimal() {
let f1 = Type::primitive_type_builder("f1", PhysicalType::INT32)
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::Decimal {
- precision: 9,
- scale: 2,
- }))
+ .with_logical_type(Some(LogicalType::decimal(2, 9)))
.with_converted_type(ConvertedType::DECIMAL)
.with_precision(9)
.with_scale(2)
@@ -1190,10 +1158,7 @@ mod tests {
let f2 = Type::primitive_type_builder("f2", PhysicalType::INT32)
.with_repetition(Repetition::OPTIONAL)
- .with_logical_type(Some(LogicalType::Decimal {
- precision: 9,
- scale: 0,
- }))
+ .with_logical_type(Some(LogicalType::decimal(0, 9)))
.with_converted_type(ConvertedType::DECIMAL)
.with_precision(9)
.with_scale(0)
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 0d504e16fc..d8b3456d47 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -1449,10 +1449,7 @@ mod tests {
#[test]
fn test_primitive_type() {
let mut result = Type::primitive_type_builder("foo",
PhysicalType::INT32)
- .with_logical_type(Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: true,
- }))
+ .with_logical_type(Some(LogicalType::integer(32, true)))
.with_id(Some(0))
.build();
assert!(result.is_ok());
@@ -1464,10 +1461,7 @@ mod tests {
assert_eq!(basic_info.repetition(), Repetition::OPTIONAL);
assert_eq!(
basic_info.logical_type_ref(),
- Some(&LogicalType::Integer {
- bit_width: 32,
- is_signed: true
- })
+ Some(&LogicalType::integer(32, true))
);
assert_eq!(basic_info.converted_type(), ConvertedType::INT_32);
assert_eq!(basic_info.id(), 0);
@@ -1482,10 +1476,7 @@ mod tests {
// Test illegal inputs with logical type
result = Type::primitive_type_builder("foo", PhysicalType::INT64)
.with_repetition(Repetition::REPEATED)
- .with_logical_type(Some(LogicalType::Integer {
- is_signed: true,
- bit_width: 8,
- }))
+ .with_logical_type(Some(LogicalType::integer(8, true)))
.build();
assert!(result.is_err());
if let Err(e) = result {
@@ -1524,10 +1515,7 @@ mod tests {
result = Type::primitive_type_builder("foo", PhysicalType::BYTE_ARRAY)
.with_repetition(Repetition::REQUIRED)
- .with_logical_type(Some(LogicalType::Decimal {
- scale: 32,
- precision: 12,
- }))
+ .with_logical_type(Some(LogicalType::decimal(32, 12)))
.with_precision(-1)
.with_scale(-1)
.build();
diff --git a/parquet/src/variant.rs b/parquet/src/variant.rs
index cdbdd84968..55df086736 100644
--- a/parquet/src/variant.rs
+++ b/parquet/src/variant.rs
@@ -199,9 +199,7 @@ mod tests {
// data should have been written with the Variant logical type
assert_eq!(
field.get_basic_info().logical_type_ref(),
- Some(&crate::basic::LogicalType::Variant {
- specification_version: None
- })
+ Some(&crate::basic::LogicalType::variant(None))
);
}
diff --git a/parquet_derive/src/parquet_field.rs
b/parquet_derive/src/parquet_field.rs
index 7473f23055..17b8d85437 100644
--- a/parquet_derive/src/parquet_field.rs
+++ b/parquet_derive/src/parquet_field.rs
@@ -693,42 +693,18 @@ impl Type {
match last_part.trim() {
"bool" => quote! { None },
- "u8" => quote! { Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: false,
- }) },
- "u16" => quote! { Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: false,
- }) },
- "u32" => quote! { Some(LogicalType::Integer {
- bit_width: 32,
- is_signed: false,
- }) },
- "u64" => quote! { Some(LogicalType::Integer {
- bit_width: 64,
- is_signed: false,
- }) },
- "i8" => quote! { Some(LogicalType::Integer {
- bit_width: 8,
- is_signed: true,
- }) },
- "i16" => quote! { Some(LogicalType::Integer {
- bit_width: 16,
- is_signed: true,
- }) },
+ "u8" => quote! { Some(LogicalType::integer(8, false)) },
+ "u16" => quote! { Some(LogicalType::integer(16, false)) },
+ "u32" => quote! { Some(LogicalType::integer(32, false)) },
+ "u64" => quote! { Some(LogicalType::integer(64, false)) },
+ "i8" => quote! { Some(LogicalType::integer(8, true)) },
+ "i16" => quote! { Some(LogicalType::integer(16, true)) },
"i32" | "i64" => quote! { None },
"usize" => {
- quote! { Some(LogicalType::Integer {
- bit_width: usize::BITS as i8,
- is_signed: false
- }) }
+ quote! { Some(LogicalType::integer(usize::BITS as i8, false)) }
}
"isize" => {
- quote! { Some(LogicalType::Integer {
- bit_width: usize::BITS as i8,
- is_signed: true
- }) }
+ quote! { Some(LogicalType::integer(usize::BITS as i8, true)) }
}
"NaiveDate" => quote! { Some(LogicalType::Date) },
"NaiveDateTime" => quote! { None },