etseidl commented on code in PR #10109:
URL: https://github.com/apache/arrow-rs/pull/10109#discussion_r3391565175


##########
parquet/src/schema/types.rs:
##########
@@ -2505,6 +2541,195 @@ mod tests {
         assert_eq!(result_schema, expected_schema);
     }
 
+    #[test]
+    fn test_file_logical_type_roundtrip() {
+        let path_field = Arc::new(
+            Type::primitive_type_builder("path", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let size_field = Arc::new(
+            Type::primitive_type_builder("size", PhysicalType::INT64)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let offset_field = Arc::new(
+            Type::primitive_type_builder("offset", PhysicalType::INT64)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let etag_field = Arc::new(
+            Type::primitive_type_builder("etag", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::OPTIONAL)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let file_group = Arc::new(
+            Type::group_type_builder("f")
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(Some(LogicalType::File))
+                .with_fields(vec![path_field, size_field, offset_field, 
etag_field])
+                .build()
+                .unwrap(),
+        );
+        let schema = Arc::new(
+            Type::group_type_builder("example")
+                .with_fields(vec![file_group])
+                .build()
+                .unwrap(),
+        );
+        let result = roundtrip_schema(schema.clone()).unwrap();
+        assert_eq!(result, schema);
+        assert_eq!(
+            result
+                .get_fields()[0]
+                .get_basic_info()
+                .logical_type_ref(),
+            Some(&LogicalType::File)
+        );
+    }
+
+    #[test]
+    fn test_file_logical_type_path_only() {
+        let path_field = Arc::new(
+            Type::primitive_type_builder("path", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let result = Type::group_type_builder("file_field")
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::File))
+            .with_fields(vec![path_field])
+            .build();
+        assert!(result.is_ok());
+        let tp = result.unwrap();
+        assert_eq!(
+            tp.get_basic_info().logical_type_ref(),
+            Some(&LogicalType::File)
+        );
+    }
+
+    #[test]
+    fn test_file_logical_type_all_fields() {
+        let path_field = Arc::new(
+            Type::primitive_type_builder("path", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let size_field = Arc::new(
+            Type::primitive_type_builder("size", PhysicalType::INT64)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let offset_field = Arc::new(
+            Type::primitive_type_builder("offset", PhysicalType::INT64)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let etag_field = Arc::new(
+            Type::primitive_type_builder("etag", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::OPTIONAL)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let result = Type::group_type_builder("file_field")
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::File))
+            .with_fields(vec![path_field, size_field, offset_field, 
etag_field])
+            .build();
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap().get_fields().len(), 4);
+    }
+
+    #[test]
+    fn test_file_logical_type_requires_path_field() {
+        let size_field = Arc::new(
+            Type::primitive_type_builder("size", PhysicalType::INT64)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let result = Type::group_type_builder("missing_path")
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::File))
+            .with_fields(vec![size_field])
+            .build();
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("must contain required field 'path'"));
+    }
+
+    #[test]
+    fn test_file_logical_type_rejects_unrecognized_field() {
+        let path_field = Arc::new(
+            Type::primitive_type_builder("path", PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::REQUIRED)
+                .with_logical_type(Some(LogicalType::String))
+                .build()
+                .unwrap(),
+        );
+        let unknown_field = Arc::new(
+            Type::primitive_type_builder("unknown_field", 
PhysicalType::BYTE_ARRAY)
+                .with_repetition(Repetition::OPTIONAL)
+                .build()
+                .unwrap(),
+        );
+        let result = Type::group_type_builder("bad_file")
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(LogicalType::File))
+            .with_fields(vec![path_field, unknown_field])
+            .build();
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("unrecognized field"));

Review Comment:
   Nit: in general I prefer full string compares to `contains`. In this case it 
should at least test for the specific unrecognized field name `'unknown_field'`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to