etseidl commented on code in PR #6871:
URL: https://github.com/apache/arrow-rs/pull/6871#discussion_r1887600878


##########
parquet/src/arrow/mod.rs:
##########
@@ -371,4 +442,109 @@ mod test {
             .unwrap();
         Bytes::from(buf)
     }
+
+    #[test]
+    fn test_mask_from_column_names() {
+        let message_type = "
+            message test_schema {
+                OPTIONAL group a (MAP) {
+                    REPEATED group key_value {
+                        REQUIRED BYTE_ARRAY key (UTF8);
+                        OPTIONAL group value (MAP) {
+                            REPEATED group key_value {
+                                REQUIRED INT32 key;
+                                REQUIRED BOOLEAN value;
+                            }
+                        }
+                    }
+                }
+                REQUIRED INT32 b;
+                REQUIRED DOUBLE c;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["foo", "bar"]);
+        assert_eq!(mask.mask.unwrap(), vec![false; 5]);
+
+        let mask = ProjectionMask::columns(&schema, []);
+        assert_eq!(mask.mask.unwrap(), vec![false; 5]);
+
+        let mask = ProjectionMask::columns(&schema, ["a", "c"]);
+        assert_eq!(mask.mask.unwrap(), [true, true, true, false, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.key_value.key", "c"]);
+        assert_eq!(mask.mask.unwrap(), [true, false, false, false, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.key_value.value", 
"b"]);
+        assert_eq!(mask.mask.unwrap(), [false, true, true, true, false]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL group a (LIST) {
+                    REPEATED group list {
+                        OPTIONAL group element (LIST) {
+                            REPEATED group list {
+                                OPTIONAL group element (LIST) {
+                                    REPEATED group list {
+                                        OPTIONAL BYTE_ARRAY element (UTF8);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                REQUIRED INT32 b;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["a", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["a.list.element", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask =
+            ProjectionMask::columns(&schema, 
["a.list.element.list.element.list.element", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true]);
+
+        let mask = ProjectionMask::columns(&schema, ["b"]);
+        assert_eq!(mask.mask.unwrap(), [false, true]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 b;
+                OPTIONAL INT32 c;
+                OPTIONAL INT32 d;
+                OPTIONAL INT32 e;
+            }
+            ";
+        let parquet_group_type = parse_message_type(message_type).unwrap();
+        let schema = SchemaDescriptor::new(Arc::new(parquet_group_type));
+
+        let mask = ProjectionMask::columns(&schema, ["a", "b"]);
+        assert_eq!(mask.mask.unwrap(), [true, true, false, false, false]);
+
+        let mask = ProjectionMask::columns(&schema, ["d", "b", "d"]);
+        assert_eq!(mask.mask.unwrap(), [false, true, false, true, false]);
+
+        let message_type = "
+            message test_schema {
+                OPTIONAL INT32 a;
+                OPTIONAL INT32 b;
+                OPTIONAL INT32 a;

Review Comment:
   Yeah, I'm not a fan of this behavior, but I think some query engines (spark 
perhaps) will produce duplicate names when joining tables. Necessary evil I 
guess.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to