LiaCastaneda commented on code in PR #17478:
URL: https://github.com/apache/datafusion/pull/17478#discussion_r2336394314


##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -1517,39 +1518,66 @@ impl ValuesFields {
     }
 }
 
-// `name_map` tracks a mapping between a field name and the number of 
appearances of that field.
-//
-// Some field names might already come to this function with the count (number 
of times it appeared)
-// as a suffix e.g. id:1, so there's still a chance of name collisions, for 
example,
-// if these three fields passed to this function: "col:1", "col" and "col", 
the function
-// would rename them to -> col:1, col, col:1 causing a posteriror error when 
building the DFSchema.
-// that's why we need the `seen` set, so the fields are always unique.
-//
-pub fn change_redundant_column(fields: &Fields) -> Vec<Field> {
-    let mut name_map = HashMap::new();
-    let mut seen: HashSet<String> = HashSet::new();
+pub fn maybe_project_redundant_column(
+    input: Arc<LogicalPlan>,
+) -> Result<Arc<LogicalPlan>> {
+    // tracks a mapping between a field name and the number of appearances of 
that field.
+    let mut name_map = HashMap::<&str, usize>::new();
+    // tracks all the fields and aliases that were previously seen.
+    let mut seen = HashSet::<Cow<String>>::new();
+
+    // Some field names might already come to this function with the count 
(number of times it appeared)
+    // as a suffix e.g. id:1, so there's still a chance of name collisions, 
for example,
+    // if these three fields passed to this function: "col:1", "col" and 
"col", the function
+    // would rename them to -> col:1, col, col:1 causing a posteriror error 
when building the DFSchema.
+    // That's why we need the `seen` set, so the fields are always unique.
+
+    let aliases = input
+        .schema()
+        .iter()
+        .map(|(_, field)| {
+            let original_name = field.name();
+            let mut name = Cow::Borrowed(original_name);
 
-    fields
-        .into_iter()
-        .map(|field| {
-            let base_name = field.name();
-            let count = name_map.entry(base_name.clone()).or_insert(0);
-            let mut new_name = base_name.clone();
+            let count = name_map.entry(original_name).or_insert(0);
 
             // Loop until we find a name that hasn't been used
-            while seen.contains(&new_name) {
+            while seen.contains(&name) {
                 *count += 1;
-                new_name = format!("{base_name}:{count}");
+                name = Cow::Owned(format!("{original_name}:{count}"));
             }
 
-            seen.insert(new_name.clone());
+            seen.insert(name.clone());
 
-            let mut modified_field =
-                Field::new(&new_name, field.data_type().clone(), 
field.is_nullable());
-            modified_field.set_metadata(field.metadata().clone());
-            modified_field
+            match name {
+                Cow::Borrowed(_) => None,
+                Cow::Owned(alias) => Some(alias),
+            }
         })
-        .collect()
+        .collect::<Vec<_>>();
+
+    // Check if there is at least an alias
+    let is_projection_needed = aliases.iter().any(Option::is_some);

Review Comment:
   very nit: maybe we can keep a flag inside the iteration above? so we avoid 
this one



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to