This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 8cc92a921f refactor: Reduce string allocations in Expr::display_name 
(use write instead of format!) (#10454)
8cc92a921f is described below

commit 8cc92a921f3b0296caf035edb66a4b3c422f084d
Author: Adam Curtis <[email protected]>
AuthorDate: Sun May 12 06:03:01 2024 -0400

    refactor: Reduce string allocations in Expr::display_name (use write 
instead of format!) (#10454)
    
    * refactor: use Write instead of format! to implement display_name
    
    * Use static dispatch for write
    
    * remove more allocations
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/expr/src/expr.rs                        | 309 +++++++++++----------
 datafusion/optimizer/src/analyzer/type_coercion.rs |   4 +-
 2 files changed, 170 insertions(+), 143 deletions(-)

diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 84e4cb6435..f0f41a4c55 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1650,28 +1650,42 @@ fn fmt_function(
     write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
 }
 
-fn create_function_name(fun: &str, distinct: bool, args: &[Expr]) -> 
Result<String> {
-    let names: Vec<String> = 
args.iter().map(create_name).collect::<Result<_>>()?;
-    let distinct_str = match distinct {
-        true => "DISTINCT ",
-        false => "",
-    };
-    Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
+fn write_function_name<W: Write>(
+    w: &mut W,
+    fun: &str,
+    distinct: bool,
+    args: &[Expr],
+) -> Result<()> {
+    write!(w, "{}(", fun)?;
+    if distinct {
+        w.write_str("DISTINCT ")?;
+    }
+    write_names_join(w, args, ",")?;
+    w.write_str(")")?;
+    Ok(())
 }
 
 /// Returns a readable name of an expression based on the input schema.
 /// This function recursively transverses the expression for names such as 
"CAST(a > 2)".
 pub(crate) fn create_name(e: &Expr) -> Result<String> {
+    let mut s = String::new();
+    write_name(&mut s, e)?;
+    Ok(s)
+}
+
+fn write_name<W: Write>(w: &mut W, e: &Expr) -> Result<()> {
     match e {
-        Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
-        Expr::Column(c) => Ok(c.flat_name()),
-        Expr::OuterReferenceColumn(_, c) => Ok(format!("outer_ref({})", 
c.flat_name())),
-        Expr::ScalarVariable(_, variable_names) => 
Ok(variable_names.join(".")),
-        Expr::Literal(value) => Ok(format!("{value:?}")),
+        Expr::Alias(Alias { name, .. }) => write!(w, "{}", name)?,
+        Expr::Column(c) => write!(w, "{}", c.flat_name())?,
+        Expr::OuterReferenceColumn(_, c) => write!(w, "outer_ref({})", 
c.flat_name())?,
+        Expr::ScalarVariable(_, variable_names) => {
+            write!(w, "{}", variable_names.join("."))?
+        }
+        Expr::Literal(value) => write!(w, "{value:?}")?,
         Expr::BinaryExpr(binary_expr) => {
-            let left = create_name(binary_expr.left.as_ref())?;
-            let right = create_name(binary_expr.right.as_ref())?;
-            Ok(format!("{} {} {}", left, binary_expr.op, right))
+            write_name(w, binary_expr.left.as_ref())?;
+            write!(w, " {} ", binary_expr.op)?;
+            write_name(w, binary_expr.right.as_ref())?;
         }
         Expr::Like(Like {
             negated,
@@ -1680,19 +1694,17 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             escape_char,
             case_insensitive,
         }) => {
-            let s = format!(
-                "{} {}{} {} {}",
+            write!(
+                w,
+                "{} {}{} {}",
                 expr,
                 if *negated { "NOT " } else { "" },
                 if *case_insensitive { "ILIKE" } else { "LIKE" },
                 pattern,
-                if let Some(char) = escape_char {
-                    format!("CHAR '{char}'")
-                } else {
-                    "".to_string()
-                }
-            );
-            Ok(s)
+            )?;
+            if let Some(char) = escape_char {
+                write!(w, " CHAR '{char}'")?;
+            }
         }
         Expr::SimilarTo(Like {
             negated,
@@ -1701,8 +1713,9 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             escape_char,
             case_insensitive: _,
         }) => {
-            let s = format!(
-                "{} {} {} {}",
+            write!(
+                w,
+                "{} {} {}",
                 expr,
                 if *negated {
                     "NOT SIMILAR TO"
@@ -1710,114 +1723,119 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> 
{
                     "SIMILAR TO"
                 },
                 pattern,
-                if let Some(char) = escape_char {
-                    format!("CHAR '{char}'")
-                } else {
-                    "".to_string()
-                }
-            );
-            Ok(s)
+            )?;
+            if let Some(char) = escape_char {
+                write!(w, " CHAR '{char}'")?;
+            }
         }
         Expr::Case(case) => {
-            let mut name = "CASE ".to_string();
+            write!(w, "CASE ")?;
             if let Some(e) = &case.expr {
-                let e = create_name(e)?;
-                let _ = write!(name, "{e} ");
+                write_name(w, e)?;
+                w.write_str(" ")?;
             }
-            for (w, t) in &case.when_then_expr {
-                let when = create_name(w)?;
-                let then = create_name(t)?;
-                let _ = write!(name, "WHEN {when} THEN {then} ");
+            for (when, then) in &case.when_then_expr {
+                w.write_str("WHEN ")?;
+                write_name(w, when)?;
+                w.write_str(" THEN ")?;
+                write_name(w, then)?;
+                w.write_str(" ")?;
             }
             if let Some(e) = &case.else_expr {
-                let e = create_name(e)?;
-                let _ = write!(name, "ELSE {e} ");
+                w.write_str("ELSE ")?;
+                write_name(w, e)?;
+                w.write_str(" ")?;
             }
-            name += "END";
-            Ok(name)
+            w.write_str("END")?;
         }
         Expr::Cast(Cast { expr, .. }) => {
             // CAST does not change the expression name
-            create_name(expr)
+            write_name(w, expr)?;
         }
         Expr::TryCast(TryCast { expr, .. }) => {
             // CAST does not change the expression name
-            create_name(expr)
+            write_name(w, expr)?;
         }
         Expr::Not(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("NOT {expr}"))
+            w.write_str("NOT ")?;
+            write_name(w, expr)?;
         }
         Expr::Negative(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("(- {expr})"))
+            w.write_str("(- ")?;
+            write_name(w, expr)?;
+            w.write_str(")")?;
         }
         Expr::IsNull(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS NULL"))
+            write_name(w, expr)?;
+            w.write_str(" IS NULL")?;
         }
         Expr::IsNotNull(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS NOT NULL"))
+            write_name(w, expr)?;
+            w.write_str(" IS NOT NULL")?;
         }
         Expr::IsTrue(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS TRUE"))
+            write_name(w, expr)?;
+            w.write_str(" IS TRUE")?;
         }
         Expr::IsFalse(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS FALSE"))
+            write_name(w, expr)?;
+            w.write_str(" IS FALSE")?;
         }
         Expr::IsUnknown(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS UNKNOWN"))
+            write_name(w, expr)?;
+            w.write_str(" IS UNKNOWN")?;
         }
         Expr::IsNotTrue(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS NOT TRUE"))
+            write_name(w, expr)?;
+            w.write_str(" IS NOT TRUE")?;
         }
         Expr::IsNotFalse(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS NOT FALSE"))
+            write_name(w, expr)?;
+            w.write_str(" IS NOT FALSE")?;
         }
         Expr::IsNotUnknown(expr) => {
-            let expr = create_name(expr)?;
-            Ok(format!("{expr} IS NOT UNKNOWN"))
+            write_name(w, expr)?;
+            w.write_str(" IS NOT UNKNOWN")?;
         }
-        Expr::Exists(Exists { negated: true, .. }) => Ok("NOT 
EXISTS".to_string()),
-        Expr::Exists(Exists { negated: false, .. }) => 
Ok("EXISTS".to_string()),
-        Expr::InSubquery(InSubquery { negated: true, .. }) => Ok("NOT 
IN".to_string()),
-        Expr::InSubquery(InSubquery { negated: false, .. }) => 
Ok("IN".to_string()),
+        Expr::Exists(Exists { negated: true, .. }) => w.write_str("NOT 
EXISTS")?,
+        Expr::Exists(Exists { negated: false, .. }) => w.write_str("EXISTS")?,
+        Expr::InSubquery(InSubquery { negated: true, .. }) => w.write_str("NOT 
IN")?,
+        Expr::InSubquery(InSubquery { negated: false, .. }) => 
w.write_str("IN")?,
         Expr::ScalarSubquery(subquery) => {
-            Ok(subquery.subquery.schema().field(0).name().clone())
+            w.write_str(subquery.subquery.schema().field(0).name().as_str())?;
         }
         Expr::GetIndexedField(GetIndexedField { expr, field }) => {
-            let expr = create_name(expr)?;
+            write_name(w, expr)?;
             match field {
-                GetFieldAccess::NamedStructField { name } => {
-                    Ok(format!("{expr}[{name}]"))
-                }
+                GetFieldAccess::NamedStructField { name } => write!(w, 
"[{name}]")?,
                 GetFieldAccess::ListIndex { key } => {
-                    let key = create_name(key)?;
-                    Ok(format!("{expr}[{key}]"))
+                    w.write_str("[")?;
+                    write_name(w, key)?;
+                    w.write_str("]")?;
                 }
                 GetFieldAccess::ListRange {
                     start,
                     stop,
                     stride,
                 } => {
-                    let start = create_name(start)?;
-                    let stop = create_name(stop)?;
-                    let stride = create_name(stride)?;
-                    Ok(format!("{expr}[{start}:{stop}:{stride}]"))
+                    w.write_str("[")?;
+                    write_name(w, start)?;
+                    w.write_str(":")?;
+                    write_name(w, stop)?;
+                    w.write_str(":")?;
+                    write_name(w, stride)?;
+                    w.write_str("]")?;
                 }
             }
         }
         Expr::Unnest(Unnest { expr }) => {
-            let expr_name = create_name(expr)?;
-            Ok(format!("unnest({expr_name})"))
+            w.write_str("unnest(")?;
+            write_name(w, expr)?;
+            w.write_str(")")?;
+        }
+        Expr::ScalarFunction(fun) => {
+            w.write_str(fun.func.display_name(&fun.args)?.as_str())?;
         }
-        Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
         Expr::WindowFunction(WindowFunction {
             fun,
             args,
@@ -1826,23 +1844,21 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             order_by,
             null_treatment,
         }) => {
-            let mut parts: Vec<String> =
-                vec![create_function_name(&fun.to_string(), false, args)?];
-
+            write_function_name(w, &fun.to_string(), false, args)?;
             if let Some(nt) = null_treatment {
-                parts.push(format!("{}", nt));
+                w.write_str(" ")?;
+                write!(w, "{}", nt)?;
             }
-
             if !partition_by.is_empty() {
-                parts.push(format!("PARTITION BY [{}]", 
expr_vec_fmt!(partition_by)));
+                w.write_str(" ")?;
+                write!(w, "PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
             }
-
             if !order_by.is_empty() {
-                parts.push(format!("ORDER BY [{}]", expr_vec_fmt!(order_by)));
+                w.write_str(" ")?;
+                write!(w, "ORDER BY [{}]", expr_vec_fmt!(order_by))?;
             }
-
-            parts.push(format!("{window_frame}"));
-            Ok(parts.join(" "))
+            w.write_str(" ")?;
+            write!(w, "{window_frame}")?;
         }
         Expr::AggregateFunction(AggregateFunction {
             func_def,
@@ -1852,48 +1868,48 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             order_by,
             null_treatment,
         }) => {
-            let name = match func_def {
+            match func_def {
                 AggregateFunctionDefinition::BuiltIn(..) => {
-                    create_function_name(func_def.name(), *distinct, args)?
+                    write_function_name(w, func_def.name(), *distinct, args)?;
                 }
-                AggregateFunctionDefinition::UDF(..) => {
-                    let names: Vec<String> =
-                        args.iter().map(create_name).collect::<Result<_>>()?;
-                    names.join(",")
+                AggregateFunctionDefinition::UDF(fun) => {
+                    write!(w, "{}(", fun.name())?;
+                    write_names_join(w, args, ",")?;
+                    write!(w, ")")?;
                 }
             };
-            let mut info = String::new();
             if let Some(fe) = filter {
-                info += &format!(" FILTER (WHERE {fe})");
+                write!(w, " FILTER (WHERE {fe})")?;
             };
             if let Some(order_by) = order_by {
-                info += &format!(" ORDER BY [{}]", expr_vec_fmt!(order_by));
+                write!(w, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
             };
             if let Some(nt) = null_treatment {
-                info += &format!(" {}", nt);
-            }
-            match func_def {
-                AggregateFunctionDefinition::BuiltIn(..) => {
-                    Ok(format!("{}{}", name, info))
-                }
-                AggregateFunctionDefinition::UDF(fun) => {
-                    Ok(format!("{}({}){}", fun.name(), name, info))
-                }
+                write!(w, " {}", nt)?;
             }
         }
         Expr::GroupingSet(grouping_set) => match grouping_set {
             GroupingSet::Rollup(exprs) => {
-                Ok(format!("ROLLUP ({})", create_names(exprs.as_slice())?))
+                write!(w, "ROLLUP (")?;
+                write_names(w, exprs.as_slice())?;
+                write!(w, ")")?;
             }
             GroupingSet::Cube(exprs) => {
-                Ok(format!("CUBE ({})", create_names(exprs.as_slice())?))
+                write!(w, "CUBE (")?;
+                write_names(w, exprs.as_slice())?;
+                write!(w, ")")?;
             }
             GroupingSet::GroupingSets(lists_of_exprs) => {
-                let mut list_of_names = vec![];
-                for exprs in lists_of_exprs {
-                    list_of_names.push(format!("({})", 
create_names(exprs.as_slice())?));
+                write!(w, "GROUPING SETS (")?;
+                for (i, exprs) in lists_of_exprs.iter().enumerate() {
+                    if i != 0 {
+                        write!(w, ", ")?;
+                    }
+                    write!(w, "(")?;
+                    write_names(w, exprs.as_slice())?;
+                    write!(w, ")")?;
                 }
-                Ok(format!("GROUPING SETS ({})", list_of_names.join(", ")))
+                write!(w, ")")?;
             }
         },
         Expr::InList(InList {
@@ -1901,12 +1917,12 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             list,
             negated,
         }) => {
-            let expr = create_name(expr)?;
+            write_name(w, expr)?;
             let list = list.iter().map(create_name);
             if *negated {
-                Ok(format!("{expr} NOT IN ({list:?})"))
+                write!(w, " NOT IN ({list:?})")?;
             } else {
-                Ok(format!("{expr} IN ({list:?})"))
+                write!(w, " IN ({list:?})")?;
             }
         }
         Expr::Between(Between {
@@ -1915,35 +1931,46 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
             low,
             high,
         }) => {
-            let expr = create_name(expr)?;
-            let low = create_name(low)?;
-            let high = create_name(high)?;
+            write_name(w, expr)?;
             if *negated {
-                Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
+                write!(w, " NOT BETWEEN ")?;
             } else {
-                Ok(format!("{expr} BETWEEN {low} AND {high}"))
+                write!(w, " BETWEEN ")?;
             }
+            write_name(w, low)?;
+            write!(w, " AND ")?;
+            write_name(w, high)?;
         }
         Expr::Sort { .. } => {
-            internal_err!("Create name does not support sort expression")
+            return internal_err!("Create name does not support sort 
expression")
         }
         Expr::Wildcard { qualifier } => match qualifier {
-            Some(qualifier) => internal_err!(
-                "Create name does not support qualified wildcard, got 
{qualifier}"
-            ),
-            None => Ok("*".to_string()),
+            Some(qualifier) => {
+                return internal_err!(
+                    "Create name does not support qualified wildcard, got 
{qualifier}"
+                )
+            }
+            None => write!(w, "*")?,
         },
-        Expr::Placeholder(Placeholder { id, .. }) => Ok((*id).to_string()),
-    }
+        Expr::Placeholder(Placeholder { id, .. }) => write!(w, "{}", id)?,
+    };
+    Ok(())
 }
 
-/// Create a comma separated list of names from a list of expressions
-fn create_names(exprs: &[Expr]) -> Result<String> {
-    Ok(exprs
-        .iter()
-        .map(create_name)
-        .collect::<Result<Vec<String>>>()?
-        .join(", "))
+fn write_names<W: Write>(w: &mut W, exprs: &[Expr]) -> Result<()> {
+    exprs.iter().try_for_each(|e| write_name(w, e))
+}
+
+fn write_names_join<W: Write>(w: &mut W, exprs: &[Expr], sep: &str) -> 
Result<()> {
+    let mut iter = exprs.iter();
+    if let Some(first_arg) = iter.next() {
+        write_name(w, first_arg)?;
+    }
+    for a in iter {
+        w.write_str(sep)?;
+        write_name(w, a)?;
+    }
+    Ok(())
 }
 
 #[cfg(test)]
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs 
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 994adf7327..60b81aff9a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -1155,7 +1155,7 @@ mod test {
         let like_expr = Expr::Like(Like::new(false, expr, pattern, None, 
false));
         let empty = empty_with_type(DataType::Utf8);
         let plan = 
LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?);
-        let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL \
+        let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL\
              \n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, 
expected)?;
 
@@ -1184,7 +1184,7 @@ mod test {
         let ilike_expr = Expr::Like(Like::new(false, expr, pattern, None, 
true));
         let empty = empty_with_type(DataType::Utf8);
         let plan = 
LogicalPlan::Projection(Projection::try_new(vec![ilike_expr], empty)?);
-        let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL 
\
+        let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL\
              \n  EmptyRelation";
         assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, 
expected)?;
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to