This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8cc92a921f refactor: Reduce string allocations in Expr::display_name
(use write instead of format!) (#10454)
8cc92a921f is described below
commit 8cc92a921f3b0296caf035edb66a4b3c422f084d
Author: Adam Curtis <[email protected]>
AuthorDate: Sun May 12 06:03:01 2024 -0400
refactor: Reduce string allocations in Expr::display_name (use write
instead of format!) (#10454)
* refactor: use Write instead of format! to implement display_name
* Use static dispatch for write
* remove more allocations
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/expr/src/expr.rs | 309 +++++++++++----------
datafusion/optimizer/src/analyzer/type_coercion.rs | 4 +-
2 files changed, 170 insertions(+), 143 deletions(-)
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 84e4cb6435..f0f41a4c55 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -1650,28 +1650,42 @@ fn fmt_function(
write!(f, "{}({}{})", fun, distinct_str, args.join(", "))
}
-fn create_function_name(fun: &str, distinct: bool, args: &[Expr]) ->
Result<String> {
- let names: Vec<String> =
args.iter().map(create_name).collect::<Result<_>>()?;
- let distinct_str = match distinct {
- true => "DISTINCT ",
- false => "",
- };
- Ok(format!("{}({}{})", fun, distinct_str, names.join(",")))
+fn write_function_name<W: Write>(
+ w: &mut W,
+ fun: &str,
+ distinct: bool,
+ args: &[Expr],
+) -> Result<()> {
+ write!(w, "{}(", fun)?;
+ if distinct {
+ w.write_str("DISTINCT ")?;
+ }
+ write_names_join(w, args, ",")?;
+ w.write_str(")")?;
+ Ok(())
}
/// Returns a readable name of an expression based on the input schema.
/// This function recursively transverses the expression for names such as
"CAST(a > 2)".
pub(crate) fn create_name(e: &Expr) -> Result<String> {
+ let mut s = String::new();
+ write_name(&mut s, e)?;
+ Ok(s)
+}
+
+fn write_name<W: Write>(w: &mut W, e: &Expr) -> Result<()> {
match e {
- Expr::Alias(Alias { name, .. }) => Ok(name.clone()),
- Expr::Column(c) => Ok(c.flat_name()),
- Expr::OuterReferenceColumn(_, c) => Ok(format!("outer_ref({})",
c.flat_name())),
- Expr::ScalarVariable(_, variable_names) =>
Ok(variable_names.join(".")),
- Expr::Literal(value) => Ok(format!("{value:?}")),
+ Expr::Alias(Alias { name, .. }) => write!(w, "{}", name)?,
+ Expr::Column(c) => write!(w, "{}", c.flat_name())?,
+ Expr::OuterReferenceColumn(_, c) => write!(w, "outer_ref({})",
c.flat_name())?,
+ Expr::ScalarVariable(_, variable_names) => {
+ write!(w, "{}", variable_names.join("."))?
+ }
+ Expr::Literal(value) => write!(w, "{value:?}")?,
Expr::BinaryExpr(binary_expr) => {
- let left = create_name(binary_expr.left.as_ref())?;
- let right = create_name(binary_expr.right.as_ref())?;
- Ok(format!("{} {} {}", left, binary_expr.op, right))
+ write_name(w, binary_expr.left.as_ref())?;
+ write!(w, " {} ", binary_expr.op)?;
+ write_name(w, binary_expr.right.as_ref())?;
}
Expr::Like(Like {
negated,
@@ -1680,19 +1694,17 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
escape_char,
case_insensitive,
}) => {
- let s = format!(
- "{} {}{} {} {}",
+ write!(
+ w,
+ "{} {}{} {}",
expr,
if *negated { "NOT " } else { "" },
if *case_insensitive { "ILIKE" } else { "LIKE" },
pattern,
- if let Some(char) = escape_char {
- format!("CHAR '{char}'")
- } else {
- "".to_string()
- }
- );
- Ok(s)
+ )?;
+ if let Some(char) = escape_char {
+ write!(w, " CHAR '{char}'")?;
+ }
}
Expr::SimilarTo(Like {
negated,
@@ -1701,8 +1713,9 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
escape_char,
case_insensitive: _,
}) => {
- let s = format!(
- "{} {} {} {}",
+ write!(
+ w,
+ "{} {} {}",
expr,
if *negated {
"NOT SIMILAR TO"
@@ -1710,114 +1723,119 @@ pub(crate) fn create_name(e: &Expr) -> Result<String>
{
"SIMILAR TO"
},
pattern,
- if let Some(char) = escape_char {
- format!("CHAR '{char}'")
- } else {
- "".to_string()
- }
- );
- Ok(s)
+ )?;
+ if let Some(char) = escape_char {
+ write!(w, " CHAR '{char}'")?;
+ }
}
Expr::Case(case) => {
- let mut name = "CASE ".to_string();
+ write!(w, "CASE ")?;
if let Some(e) = &case.expr {
- let e = create_name(e)?;
- let _ = write!(name, "{e} ");
+ write_name(w, e)?;
+ w.write_str(" ")?;
}
- for (w, t) in &case.when_then_expr {
- let when = create_name(w)?;
- let then = create_name(t)?;
- let _ = write!(name, "WHEN {when} THEN {then} ");
+ for (when, then) in &case.when_then_expr {
+ w.write_str("WHEN ")?;
+ write_name(w, when)?;
+ w.write_str(" THEN ")?;
+ write_name(w, then)?;
+ w.write_str(" ")?;
}
if let Some(e) = &case.else_expr {
- let e = create_name(e)?;
- let _ = write!(name, "ELSE {e} ");
+ w.write_str("ELSE ")?;
+ write_name(w, e)?;
+ w.write_str(" ")?;
}
- name += "END";
- Ok(name)
+ w.write_str("END")?;
}
Expr::Cast(Cast { expr, .. }) => {
// CAST does not change the expression name
- create_name(expr)
+ write_name(w, expr)?;
}
Expr::TryCast(TryCast { expr, .. }) => {
// CAST does not change the expression name
- create_name(expr)
+ write_name(w, expr)?;
}
Expr::Not(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("NOT {expr}"))
+ w.write_str("NOT ")?;
+ write_name(w, expr)?;
}
Expr::Negative(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("(- {expr})"))
+ w.write_str("(- ")?;
+ write_name(w, expr)?;
+ w.write_str(")")?;
}
Expr::IsNull(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS NULL"))
+ write_name(w, expr)?;
+ w.write_str(" IS NULL")?;
}
Expr::IsNotNull(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS NOT NULL"))
+ write_name(w, expr)?;
+ w.write_str(" IS NOT NULL")?;
}
Expr::IsTrue(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS TRUE"))
+ write_name(w, expr)?;
+ w.write_str(" IS TRUE")?;
}
Expr::IsFalse(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS FALSE"))
+ write_name(w, expr)?;
+ w.write_str(" IS FALSE")?;
}
Expr::IsUnknown(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS UNKNOWN"))
+ write_name(w, expr)?;
+ w.write_str(" IS UNKNOWN")?;
}
Expr::IsNotTrue(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS NOT TRUE"))
+ write_name(w, expr)?;
+ w.write_str(" IS NOT TRUE")?;
}
Expr::IsNotFalse(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS NOT FALSE"))
+ write_name(w, expr)?;
+ w.write_str(" IS NOT FALSE")?;
}
Expr::IsNotUnknown(expr) => {
- let expr = create_name(expr)?;
- Ok(format!("{expr} IS NOT UNKNOWN"))
+ write_name(w, expr)?;
+ w.write_str(" IS NOT UNKNOWN")?;
}
- Expr::Exists(Exists { negated: true, .. }) => Ok("NOT
EXISTS".to_string()),
- Expr::Exists(Exists { negated: false, .. }) =>
Ok("EXISTS".to_string()),
- Expr::InSubquery(InSubquery { negated: true, .. }) => Ok("NOT
IN".to_string()),
- Expr::InSubquery(InSubquery { negated: false, .. }) =>
Ok("IN".to_string()),
+ Expr::Exists(Exists { negated: true, .. }) => w.write_str("NOT
EXISTS")?,
+ Expr::Exists(Exists { negated: false, .. }) => w.write_str("EXISTS")?,
+ Expr::InSubquery(InSubquery { negated: true, .. }) => w.write_str("NOT
IN")?,
+ Expr::InSubquery(InSubquery { negated: false, .. }) =>
w.write_str("IN")?,
Expr::ScalarSubquery(subquery) => {
- Ok(subquery.subquery.schema().field(0).name().clone())
+ w.write_str(subquery.subquery.schema().field(0).name().as_str())?;
}
Expr::GetIndexedField(GetIndexedField { expr, field }) => {
- let expr = create_name(expr)?;
+ write_name(w, expr)?;
match field {
- GetFieldAccess::NamedStructField { name } => {
- Ok(format!("{expr}[{name}]"))
- }
+ GetFieldAccess::NamedStructField { name } => write!(w,
"[{name}]")?,
GetFieldAccess::ListIndex { key } => {
- let key = create_name(key)?;
- Ok(format!("{expr}[{key}]"))
+ w.write_str("[")?;
+ write_name(w, key)?;
+ w.write_str("]")?;
}
GetFieldAccess::ListRange {
start,
stop,
stride,
} => {
- let start = create_name(start)?;
- let stop = create_name(stop)?;
- let stride = create_name(stride)?;
- Ok(format!("{expr}[{start}:{stop}:{stride}]"))
+ w.write_str("[")?;
+ write_name(w, start)?;
+ w.write_str(":")?;
+ write_name(w, stop)?;
+ w.write_str(":")?;
+ write_name(w, stride)?;
+ w.write_str("]")?;
}
}
}
Expr::Unnest(Unnest { expr }) => {
- let expr_name = create_name(expr)?;
- Ok(format!("unnest({expr_name})"))
+ w.write_str("unnest(")?;
+ write_name(w, expr)?;
+ w.write_str(")")?;
+ }
+ Expr::ScalarFunction(fun) => {
+ w.write_str(fun.func.display_name(&fun.args)?.as_str())?;
}
- Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args),
Expr::WindowFunction(WindowFunction {
fun,
args,
@@ -1826,23 +1844,21 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
order_by,
null_treatment,
}) => {
- let mut parts: Vec<String> =
- vec![create_function_name(&fun.to_string(), false, args)?];
-
+ write_function_name(w, &fun.to_string(), false, args)?;
if let Some(nt) = null_treatment {
- parts.push(format!("{}", nt));
+ w.write_str(" ")?;
+ write!(w, "{}", nt)?;
}
-
if !partition_by.is_empty() {
- parts.push(format!("PARTITION BY [{}]",
expr_vec_fmt!(partition_by)));
+ w.write_str(" ")?;
+ write!(w, "PARTITION BY [{}]", expr_vec_fmt!(partition_by))?;
}
-
if !order_by.is_empty() {
- parts.push(format!("ORDER BY [{}]", expr_vec_fmt!(order_by)));
+ w.write_str(" ")?;
+ write!(w, "ORDER BY [{}]", expr_vec_fmt!(order_by))?;
}
-
- parts.push(format!("{window_frame}"));
- Ok(parts.join(" "))
+ w.write_str(" ")?;
+ write!(w, "{window_frame}")?;
}
Expr::AggregateFunction(AggregateFunction {
func_def,
@@ -1852,48 +1868,48 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
order_by,
null_treatment,
}) => {
- let name = match func_def {
+ match func_def {
AggregateFunctionDefinition::BuiltIn(..) => {
- create_function_name(func_def.name(), *distinct, args)?
+ write_function_name(w, func_def.name(), *distinct, args)?;
}
- AggregateFunctionDefinition::UDF(..) => {
- let names: Vec<String> =
- args.iter().map(create_name).collect::<Result<_>>()?;
- names.join(",")
+ AggregateFunctionDefinition::UDF(fun) => {
+ write!(w, "{}(", fun.name())?;
+ write_names_join(w, args, ",")?;
+ write!(w, ")")?;
}
};
- let mut info = String::new();
if let Some(fe) = filter {
- info += &format!(" FILTER (WHERE {fe})");
+ write!(w, " FILTER (WHERE {fe})")?;
};
if let Some(order_by) = order_by {
- info += &format!(" ORDER BY [{}]", expr_vec_fmt!(order_by));
+ write!(w, " ORDER BY [{}]", expr_vec_fmt!(order_by))?;
};
if let Some(nt) = null_treatment {
- info += &format!(" {}", nt);
- }
- match func_def {
- AggregateFunctionDefinition::BuiltIn(..) => {
- Ok(format!("{}{}", name, info))
- }
- AggregateFunctionDefinition::UDF(fun) => {
- Ok(format!("{}({}){}", fun.name(), name, info))
- }
+ write!(w, " {}", nt)?;
}
}
Expr::GroupingSet(grouping_set) => match grouping_set {
GroupingSet::Rollup(exprs) => {
- Ok(format!("ROLLUP ({})", create_names(exprs.as_slice())?))
+ write!(w, "ROLLUP (")?;
+ write_names(w, exprs.as_slice())?;
+ write!(w, ")")?;
}
GroupingSet::Cube(exprs) => {
- Ok(format!("CUBE ({})", create_names(exprs.as_slice())?))
+ write!(w, "CUBE (")?;
+ write_names(w, exprs.as_slice())?;
+ write!(w, ")")?;
}
GroupingSet::GroupingSets(lists_of_exprs) => {
- let mut list_of_names = vec![];
- for exprs in lists_of_exprs {
- list_of_names.push(format!("({})",
create_names(exprs.as_slice())?));
+ write!(w, "GROUPING SETS (")?;
+ for (i, exprs) in lists_of_exprs.iter().enumerate() {
+ if i != 0 {
+ write!(w, ", ")?;
+ }
+ write!(w, "(")?;
+ write_names(w, exprs.as_slice())?;
+ write!(w, ")")?;
}
- Ok(format!("GROUPING SETS ({})", list_of_names.join(", ")))
+ write!(w, ")")?;
}
},
Expr::InList(InList {
@@ -1901,12 +1917,12 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
list,
negated,
}) => {
- let expr = create_name(expr)?;
+ write_name(w, expr)?;
let list = list.iter().map(create_name);
if *negated {
- Ok(format!("{expr} NOT IN ({list:?})"))
+ write!(w, " NOT IN ({list:?})")?;
} else {
- Ok(format!("{expr} IN ({list:?})"))
+ write!(w, " IN ({list:?})")?;
}
}
Expr::Between(Between {
@@ -1915,35 +1931,46 @@ pub(crate) fn create_name(e: &Expr) -> Result<String> {
low,
high,
}) => {
- let expr = create_name(expr)?;
- let low = create_name(low)?;
- let high = create_name(high)?;
+ write_name(w, expr)?;
if *negated {
- Ok(format!("{expr} NOT BETWEEN {low} AND {high}"))
+ write!(w, " NOT BETWEEN ")?;
} else {
- Ok(format!("{expr} BETWEEN {low} AND {high}"))
+ write!(w, " BETWEEN ")?;
}
+ write_name(w, low)?;
+ write!(w, " AND ")?;
+ write_name(w, high)?;
}
Expr::Sort { .. } => {
- internal_err!("Create name does not support sort expression")
+ return internal_err!("Create name does not support sort
expression")
}
Expr::Wildcard { qualifier } => match qualifier {
- Some(qualifier) => internal_err!(
- "Create name does not support qualified wildcard, got
{qualifier}"
- ),
- None => Ok("*".to_string()),
+ Some(qualifier) => {
+ return internal_err!(
+ "Create name does not support qualified wildcard, got
{qualifier}"
+ )
+ }
+ None => write!(w, "*")?,
},
- Expr::Placeholder(Placeholder { id, .. }) => Ok((*id).to_string()),
- }
+ Expr::Placeholder(Placeholder { id, .. }) => write!(w, "{}", id)?,
+ };
+ Ok(())
}
-/// Create a comma separated list of names from a list of expressions
-fn create_names(exprs: &[Expr]) -> Result<String> {
- Ok(exprs
- .iter()
- .map(create_name)
- .collect::<Result<Vec<String>>>()?
- .join(", "))
+fn write_names<W: Write>(w: &mut W, exprs: &[Expr]) -> Result<()> {
+ exprs.iter().try_for_each(|e| write_name(w, e))
+}
+
+fn write_names_join<W: Write>(w: &mut W, exprs: &[Expr], sep: &str) ->
Result<()> {
+ let mut iter = exprs.iter();
+ if let Some(first_arg) = iter.next() {
+ write_name(w, first_arg)?;
+ }
+ for a in iter {
+ w.write_str(sep)?;
+ write_name(w, a)?;
+ }
+ Ok(())
}
#[cfg(test)]
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs
b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 994adf7327..60b81aff9a 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -1155,7 +1155,7 @@ mod test {
let like_expr = Expr::Like(Like::new(false, expr, pattern, None,
false));
let empty = empty_with_type(DataType::Utf8);
let plan =
LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?);
- let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL \
+ let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL\
\n EmptyRelation";
assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan,
expected)?;
@@ -1184,7 +1184,7 @@ mod test {
let ilike_expr = Expr::Like(Like::new(false, expr, pattern, None,
true));
let empty = empty_with_type(DataType::Utf8);
let plan =
LogicalPlan::Projection(Projection::try_new(vec![ilike_expr], empty)?);
- let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL
\
+ let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL\
\n EmptyRelation";
assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan,
expected)?;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]