alamb commented on code in PR #8629:
URL: https://github.com/apache/arrow-datafusion/pull/8629#discussion_r1435239529


##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1189,6 +1173,42 @@ pub fn build_join_schema(
     schema.with_functional_dependencies(func_dependencies)
 }
 
+/// Add additional "synthetic" group by expressions based on functional
+/// dependencies.
+///
+/// For example, if we are grouping on `[c1]`, and we know from
+/// functional dependencies that column `c1` determines `c2`, this function
+/// adds `c2` to the group by list.
+///
+/// This allows MySQL style selects like
+/// `SELECT col FROM t WHERE pk = 5` if col is unique
+fn add_group_by_exprs_from_dependencies(

Review Comment:
   I pulled the logic into its own function so I could document it better



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1189,6 +1173,42 @@ pub fn build_join_schema(
     schema.with_functional_dependencies(func_dependencies)
 }
 
+/// Add additional "synthetic" group by expressions based on functional
+/// dependencies.
+///
+/// For example, if we are grouping on `[c1]`, and we know from
+/// functional dependencies that column `c1` determines `c2`, this function
+/// adds `c2` to the group by list.
+///
+/// This allows MySQL style selects like
+/// `SELECT col FROM t WHERE pk = 5` if col is unique
+fn add_group_by_exprs_from_dependencies(
+    mut group_expr: Vec<Expr>,
+    schema: &DFSchemaRef,
+) -> Result<Vec<Expr>> {
+    // Names of the fields produced by the GROUP BY exprs for example, `GROUP 
BY
+    // c1 + 1` produces an output field named `"c1 + 1"`
+    let mut group_by_field_names = group_expr
+        .iter()
+        .map(|e| e.display_name())
+        .collect::<Result<Vec<_>>>()?;
+
+    if let Some(target_indices) =
+        get_target_functional_dependencies(schema, &group_by_field_names)
+    {
+        for idx in target_indices {
+            let field = schema.field(idx);
+            let expr =
+                Expr::Column(Column::new(field.qualifier().cloned(), 
field.name()));
+            let expr_name = expr.display_name()?;
+            if !group_by_field_names.contains(&expr_name) {

Review Comment:
   this is the change -- rather than comparing the `Expr`s it compares their 
`display_name` (the fields they will create)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to