paleolimbot commented on code in PR #10065:
URL: https://github.com/apache/arrow-rs/pull/10065#discussion_r3359908231
##########
parquet/tests/geospatial.rs:
##########
@@ -425,4 +425,221 @@ mod test {
);
Arc::new(array)
}
+
+ #[test]
+ fn test_logical_type_to_field_conversion() {
+ use parquet::arrow::parquet_to_arrow_schema;
+ use parquet::basic::Type as PhysicalType;
+ use parquet::schema::types::{SchemaDescriptor, Type};
+
+ // Test cases: (LogicalType, expected metadata JSON)
+ let test_cases = [
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ (LogicalType::geometry(None), r#"{"crs":"OGC:CRS84"}"#),
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ (LogicalType::geometry(Some("srid:0".to_string())), r#"{}"#),
+ // Geometry with custom CRSes (authority:code and partial projjson)
+ (
+ LogicalType::geometry(Some("EPSG:4267".to_string())),
+ r#"{"crs":"EPSG:4267"}"#,
+ ),
+ (
+ LogicalType::geometry(Some(
+ r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string(),
+ )),
+ r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#,
+ ),
+ // Geography with default CRS (default OGC:CRS84, spherical edges)
+ (
+ LogicalType::geography(None, None),
+ r#"{"crs":"OGC:CRS84","edges":"spherical"}"#,
+ ),
+ // Geography with explicit edges
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::SPHERICAL)),
+ r#"{"crs":"OGC:CRS84","edges":"spherical"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::KARNEY)),
+ r#"{"crs":"OGC:CRS84","edges":"karney"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::VINCENTY)),
+ r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::ANDOYER)),
+ r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::THOMAS)),
+ r#"{"crs":"OGC:CRS84","edges":"thomas"}"#,
+ ),
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ // and spherical edges
+ (
+ LogicalType::geography(Some("srid:0".to_string()), None),
+ r#"{"edges":"spherical"}"#,
+ ),
+ // Geography with custom CRSes (authority:code and partial
projjson)
+ (
+ LogicalType::geography(Some("EPSG:4267".to_string()), None),
+ r#"{"crs":"EPSG:4267","edges":"spherical"}"#,
+ ),
+ (
+ LogicalType::geography(
+
Some(r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string()),
+ None,
+ ),
+
r#"{"crs":{"id":{"authority":"EPSG","code":4326}},"edges":"spherical"}"#,
+ ),
Review Comment:
These are the test cases for reading Parquet files. For GeoArrow
implementation to correctly recognize the intent of the CRS field in the
Parquet file, this is the GeoArrow extension metadata that must be produced in
each of these cases.
There were a few problems with the existing implementation:
- `LogicalType::geometry(None)` (i.e., Parquet default CRS) was read as as
`{}` (which in GeoArrow land means "I don't know what the CRS is)
- `LogicalType::geography(Some(...), None)` (i.e. Parquet default edge
algorithm) was read as `{"crs":"..."}` (i.e., no edges in the GeoArrow
metadata, which consumers would recognize as GEOMETRY and not GEOGRAPHY)
- `LogicalType::geography(Some(...),
Some(EdgeInterpolationAlgorithm::SPHERICAL))` (i.e. an actual edge algorithm)
was read as `{"crs":"...","algorithm":"spherical"}`. The `"algorithm"` key
isn't valid in GeoArrow and this would be rejected by consumers or ignored and
read as GEOMETRY (not geography).
##########
parquet/tests/geospatial.rs:
##########
@@ -425,4 +425,221 @@ mod test {
);
Arc::new(array)
}
+
+ #[test]
+ fn test_logical_type_to_field_conversion() {
+ use parquet::arrow::parquet_to_arrow_schema;
+ use parquet::basic::Type as PhysicalType;
+ use parquet::schema::types::{SchemaDescriptor, Type};
+
+ // Test cases: (LogicalType, expected metadata JSON)
+ let test_cases = [
+ // Geometry with default CRS (defaults to OGC:CRS84 per Parquet
spec)
+ (LogicalType::geometry(None), r#"{"crs":"OGC:CRS84"}"#),
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ (LogicalType::geometry(Some("srid:0".to_string())), r#"{}"#),
+ // Geometry with custom CRSes (authority:code and partial projjson)
+ (
+ LogicalType::geometry(Some("EPSG:4267".to_string())),
+ r#"{"crs":"EPSG:4267"}"#,
+ ),
+ (
+ LogicalType::geometry(Some(
+ r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string(),
+ )),
+ r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#,
+ ),
+ // Geography with default CRS (default OGC:CRS84, spherical edges)
+ (
+ LogicalType::geography(None, None),
+ r#"{"crs":"OGC:CRS84","edges":"spherical"}"#,
+ ),
+ // Geography with explicit edges
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::SPHERICAL)),
+ r#"{"crs":"OGC:CRS84","edges":"spherical"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::KARNEY)),
+ r#"{"crs":"OGC:CRS84","edges":"karney"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::VINCENTY)),
+ r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::ANDOYER)),
+ r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#,
+ ),
+ (
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::THOMAS)),
+ r#"{"crs":"OGC:CRS84","edges":"thomas"}"#,
+ ),
+ // Geometry with srid:0 should result in an unset (omitted) CRS
+ // and spherical edges
+ (
+ LogicalType::geography(Some("srid:0".to_string()), None),
+ r#"{"edges":"spherical"}"#,
+ ),
+ // Geography with custom CRSes (authority:code and partial
projjson)
+ (
+ LogicalType::geography(Some("EPSG:4267".to_string()), None),
+ r#"{"crs":"EPSG:4267","edges":"spherical"}"#,
+ ),
+ (
+ LogicalType::geography(
+
Some(r#"{"id":{"authority":"EPSG","code":4326}}"#.to_string()),
+ None,
+ ),
+
r#"{"crs":{"id":{"authority":"EPSG","code":4326}},"edges":"spherical"}"#,
+ ),
+ ];
+
+ for (logical_type, expected_metadata) in test_cases {
+ // Build a Parquet schema with the given LogicalType
+ let parquet_schema = SchemaDescriptor::new(Arc::new(
+ Type::group_type_builder("schema")
+ .with_fields(vec![Arc::new(
+ Type::primitive_type_builder("geom",
PhysicalType::BYTE_ARRAY)
+ .with_logical_type(Some(logical_type.clone()))
+ .build()
+ .unwrap(),
+ )])
+ .build()
+ .unwrap(),
+ ));
+
+ // Convert to Arrow schema
+ let arrow_schema = parquet_to_arrow_schema(&parquet_schema,
None).unwrap();
+ let field = arrow_schema.field(0);
+
+ // Check extension type name
+ let ext_name = field.metadata().get("ARROW:extension:name");
+ assert_eq!(
+ ext_name,
+ Some(&"geoarrow.wkb".to_string()),
+ "Extension name mismatch for {logical_type:?}"
+ );
+
+ // Check extension metadata
+ let ext_metadata =
field.metadata().get("ARROW:extension:metadata");
+ assert_eq!(
+ ext_metadata,
+ Some(&expected_metadata.to_string()),
+ "Extension metadata mismatch for {logical_type:?}"
+ );
+ }
+ }
+
+ #[test]
+ fn test_field_to_logical_type_conversion() {
+ use std::collections::HashMap;
+
+ // Test cases: (extension metadata JSON, expected LogicalType)
+ let test_cases = [
+ // Geometry with no CRS should be GEOMETRY(srid:0)
+ (r#"{}"#, LogicalType::geometry(Some("srid:0".to_string()))),
+ // Geometry with string CRS
+ (
+ r#"{"crs":"EPSG:4267"}"#,
+ LogicalType::geometry(Some("\"EPSG:4267\"".to_string())),
+ ),
+ // Geometry with PROJJSON CRS
+ (
+ r#"{"crs":{"id":{"authority":"EPSG","code":3857}}}"#,
+ LogicalType::geometry(Some(
+ r#"{"id":{"authority":"EPSG","code":3857}}"#.to_string(),
+ )),
+ ),
+ // Geometry with lon/lat CRSes (canonically removed because
lon/lat is the
+ // default Parquet CRS)
+ (r#"{"crs":"OGC:CRS84"}"#, LogicalType::geometry(None)),
+ (r#"{"crs":"EPSG:4326"}"#, LogicalType::geometry(None)),
+ (
+ r#"{"crs":{"id":{"authority":"EPSG","code":4326}}}"#,
+ LogicalType::geometry(None),
+ ),
+ (
+ r#"{"crs":{"id":{"authority":"EPSG","code":"4326"}}}"#,
+ LogicalType::geometry(None),
+ ),
+ (
+ r#"{"crs":{"id":{"authority":"OGC","code":"CRS84"}}}"#,
+ LogicalType::geometry(None),
+ ),
+ // Geography with no CRS, spherical edges
+ (
+ r#"{"edges":"spherical"}"#,
+ LogicalType::geography(Some("srid:0".to_string()), None),
+ ),
+ // Geography with OGC:CRS84 and spherical edges
+ (
+ r#"{"crs":"OGC:CRS84","edges":"spherical"}"#,
+ LogicalType::geography(None, None),
+ ),
+ // Geography with different edge algorithms
+ (
+ r#"{"crs":"OGC:CRS84","edges":"karney"}"#,
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::KARNEY)),
+ ),
+ (
+ r#"{"crs":"OGC:CRS84","edges":"vincenty"}"#,
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::VINCENTY)),
+ ),
+ (
+ r#"{"crs":"OGC:CRS84","edges":"andoyer"}"#,
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::ANDOYER)),
+ ),
+ (
+ r#"{"crs":"OGC:CRS84","edges":"thomas"}"#,
+ LogicalType::geography(None,
Some(EdgeInterpolationAlgorithm::THOMAS)),
+ ),
+ // Geography with custom CRS and edges
+ (
+ r#"{"crs":"EPSG:4267","edges":"karney"}"#,
+ LogicalType::geography(
+ Some("\"EPSG:4267\"".to_string()),
+ Some(EdgeInterpolationAlgorithm::KARNEY),
+ ),
+ ),
+ // Geography with PROJJSON CRS
+ (
+
r#"{"crs":{"id":{"authority":"EPSG","code":4267}},"edges":"spherical"}"#,
+ LogicalType::geography(
+
Some(r#"{"id":{"authority":"EPSG","code":4267}}"#.to_string()),
+ None,
+ ),
+ ),
Review Comment:
These are the tests for writing. The main issue with writing was that
`"edges":"spherical"`, which GeoArrow uses to communicate GEOGRAPHY, was not
actually writing a geography type because the implementation expected
`"algorithm"`. An unset CRS (geoarrow metadata `{}`) was also written as a
Parquet default CRS which is technically incorrect but also not common.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]