This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new debc5bf85 [Parquet] Add projection utility functions (#6931)
debc5bf85 is described below
commit debc5bf85729f7ebeb3789c6094a784abca95f73
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Sat Jan 4 05:23:12 2025 -0500
[Parquet] Add projection utility functions (#6931)
* projection utilities
* improve docs
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
parquet/src/arrow/mod.rs | 101 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 101 insertions(+)
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index 1305bbac8..35f5897c1 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -281,6 +281,45 @@ impl ProjectionMask {
pub fn leaf_included(&self, leaf_idx: usize) -> bool {
self.mask.as_ref().map(|m| m[leaf_idx]).unwrap_or(true)
}
+
+ /// Union two projection masks
+ ///
+ /// Example:
+ /// ```text
+ /// mask1 = [true, false, true]
+ /// mask2 = [false, true, true]
+ /// union(mask1, mask2) = [true, true, true]
+ /// ```
+ pub fn union(&mut self, other: &Self) {
+ match (self.mask.as_ref(), other.mask.as_ref()) {
+ (None, _) | (_, None) => self.mask = None,
+ (Some(a), Some(b)) => {
+ debug_assert_eq!(a.len(), b.len());
+ let mask = a.iter().zip(b.iter()).map(|(&a, &b)| a ||
b).collect();
+ self.mask = Some(mask);
+ }
+ }
+ }
+
+ /// Intersect two projection masks
+ ///
+ /// Example:
+ /// ```text
+ /// mask1 = [true, false, true]
+ /// mask2 = [false, true, true]
+ /// intersect(mask1, mask2) = [false, false, true]
+ /// ```
+ pub fn intersect(&mut self, other: &Self) {
+ match (self.mask.as_ref(), other.mask.as_ref()) {
+ (None, _) => self.mask = other.mask.clone(),
+ (_, None) => {}
+ (Some(a), Some(b)) => {
+ debug_assert_eq!(a.len(), b.len());
+ let mask = a.iter().zip(b.iter()).map(|(&a, &b)| a &&
b).collect();
+ self.mask = Some(mask);
+ }
+ }
+ }
}
/// Lookups up the parquet column by name
@@ -551,4 +590,66 @@ mod test {
let mask = ProjectionMask::columns(&schema, ["a", "e"]);
assert_eq!(mask.mask.unwrap(), [true, false, true, false, true]);
}
+
+ #[test]
+ fn test_projection_mask_union() {
+ let mut mask1 = ProjectionMask {
+ mask: Some(vec![true, false, true]),
+ };
+ let mask2 = ProjectionMask {
+ mask: Some(vec![false, true, true]),
+ };
+ mask1.union(&mask2);
+ assert_eq!(mask1.mask, Some(vec![true, true, true]));
+
+ let mut mask1 = ProjectionMask { mask: None };
+ let mask2 = ProjectionMask {
+ mask: Some(vec![false, true, true]),
+ };
+ mask1.union(&mask2);
+ assert_eq!(mask1.mask, None);
+
+ let mut mask1 = ProjectionMask {
+ mask: Some(vec![true, false, true]),
+ };
+ let mask2 = ProjectionMask { mask: None };
+ mask1.union(&mask2);
+ assert_eq!(mask1.mask, None);
+
+ let mut mask1 = ProjectionMask { mask: None };
+ let mask2 = ProjectionMask { mask: None };
+ mask1.union(&mask2);
+ assert_eq!(mask1.mask, None);
+ }
+
+ #[test]
+ fn test_projection_mask_intersect() {
+ let mut mask1 = ProjectionMask {
+ mask: Some(vec![true, false, true]),
+ };
+ let mask2 = ProjectionMask {
+ mask: Some(vec![false, true, true]),
+ };
+ mask1.intersect(&mask2);
+ assert_eq!(mask1.mask, Some(vec![false, false, true]));
+
+ let mut mask1 = ProjectionMask { mask: None };
+ let mask2 = ProjectionMask {
+ mask: Some(vec![false, true, true]),
+ };
+ mask1.intersect(&mask2);
+ assert_eq!(mask1.mask, Some(vec![false, true, true]));
+
+ let mut mask1 = ProjectionMask {
+ mask: Some(vec![true, false, true]),
+ };
+ let mask2 = ProjectionMask { mask: None };
+ mask1.intersect(&mask2);
+ assert_eq!(mask1.mask, Some(vec![true, false, true]));
+
+ let mut mask1 = ProjectionMask { mask: None };
+ let mask2 = ProjectionMask { mask: None };
+ mask1.intersect(&mask2);
+ assert_eq!(mask1.mask, None);
+ }
}