adriangb commented on code in PR #18176:
URL: https://github.com/apache/datafusion/pull/18176#discussion_r2445347872


##########
datafusion/physical-plan/src/projection.rs:
##########
@@ -232,6 +226,177 @@ impl From<(Arc<dyn PhysicalExpr>, String)> for 
ProjectionExpr {
     }
 }
 
+impl From<&(Arc<dyn PhysicalExpr>, String)> for ProjectionExpr {
+    fn from(value: &(Arc<dyn PhysicalExpr>, String)) -> Self {
+        Self::new(Arc::clone(&value.0), value.1.clone())
+    }
+}
+
+impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
+    fn from(value: ProjectionExpr) -> Self {
+        (value.expr, value.alias)
+    }
+}
+
+/// A collection of projection expressions.
+///
+/// This struct encapsulates multiple `ProjectionExpr` instances,
+/// representing a complete projection operation and provides
+/// methods to manipulate and analyze the projection as a whole.
+#[derive(Debug, Clone)]
+pub struct Projection {
+    exprs: Vec<ProjectionExpr>,
+}
+
+impl From<Vec<ProjectionExpr>> for Projection {
+    fn from(value: Vec<ProjectionExpr>) -> Self {
+        Self { exprs: value }
+    }
+}
+
+impl From<&[ProjectionExpr]> for Projection {
+    fn from(value: &[ProjectionExpr]) -> Self {
+        Self {
+            exprs: value.to_vec(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for Projection {
+    fn as_ref(&self) -> &[ProjectionExpr] {
+        &self.exprs
+    }
+}
+
+impl Projection {
+    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
+        Self { exprs }
+    }
+
+    /// Apply another projection on top of this projection, returning the 
combined projection.
+    /// For example, if this projection is `SELECT c@2 AS x, b@1 AS y, a@0 as 
z` and the other projection is `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`,
+    /// we return a projection equivalent to `SELECT c@2 + 1 AS c1, b@1 + a@0 
as c2`.
+    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+        let mut new_exprs = Vec::with_capacity(other.exprs.len());
+        for proj_expr in &other.exprs {
+            let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
+                .ok_or_else(|| {
+                    internal_datafusion_err!(
+                        "Failed to combine projections: expression {} could 
not be updated",
+                        proj_expr.expr
+                    )
+                })?;
+            new_exprs.push(ProjectionExpr {
+                expr: new_expr,
+                alias: proj_expr.alias.clone(),
+            });
+        }
+        Ok(Projection::new(new_exprs))
+    }
+
+    /// Merge an iterator of projections into a single projection.
+    /// For example, if the projections are:
+    /// 1. `SELECT c@2 AS x, b@1 AS y, a@0 as z`
+    /// 2. `SELECT x@0 + 1 AS c1, y@1 + z@2 as c2`
+    /// 3. `SELECT c1@0 * 2 AS final_c1`
+    /// we return a projection equivalent to `SELECT (c@2 + 1) * 2 AS 
final_c1`.
+    pub fn try_merge_iter<I>(projections: I) -> Result<Projection>
+    where
+        I: IntoIterator<Item = Projection>,
+    {
+        let mut iter = projections.into_iter();
+        let first = iter
+            .next()
+            .ok_or_else(|| internal_datafusion_err!("No projections to 
merge"))?;
+        iter.try_fold(first, |acc, proj| acc.try_merge(&proj))
+    }
+
+    /// Extract the column indices used in this projection.
+    /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` 
is at index 0 and `b` is at index 1,
+    /// this function would return `[0, 1]`.
+    /// Repeated indices are returned only once, and the order is ascending.
+    pub fn column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .map(|e| collect_columns(&e.expr).into_iter().map(|col| 
col.index()))
+            .flatten()
+            .sorted_unstable()
+            .dedup()
+            .collect_vec()

Review Comment:
   > Consider using .unique() from itertools (already imported) which handles 
both deduplication and sorting in a single pass
   
   Not true, it does *not* handle sorting. I think this is fine.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to