This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ff728d6c75 Remove need for sort in new_with_metadata (#8855)
ff728d6c75 is described below
commit ff728d6c75eb0eef048d1b2f61a73bf750d2814e
Author: Simon Vandel Sillesen <[email protected]>
AuthorDate: Mon Jan 15 08:21:38 2024 +0000
Remove need for sort in new_with_metadata (#8855)
BTreeMap gives stable iteration order, so we don't need to sort
Speeds up benchmarks in sql_planner.rs by 3-8%
---
datafusion/common/src/dfschema.rs | 16 +++++-----------
1 file changed, 5 insertions(+), 11 deletions(-)
diff --git a/datafusion/common/src/dfschema.rs
b/datafusion/common/src/dfschema.rs
index 85b97aac03..c715fad112 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -18,7 +18,7 @@
//! DFSchema is an extended schema struct that DataFusion uses to provide
support for
//! fields with optional relation names.
-use std::collections::{HashMap, HashSet};
+use std::collections::{BTreeSet, HashMap};
use std::convert::TryFrom;
use std::fmt::{Display, Formatter};
use std::hash::Hash;
@@ -135,8 +135,8 @@ impl DFSchema {
fields: Vec<DFField>,
metadata: HashMap<String, String>,
) -> Result<Self> {
- let mut qualified_names = HashSet::new();
- let mut unqualified_names = HashSet::new();
+ let mut qualified_names = BTreeSet::new();
+ let mut unqualified_names = BTreeSet::new();
for field in &fields {
if let Some(qualifier) = field.qualifier() {
@@ -148,14 +148,8 @@ impl DFSchema {
}
}
- // check for mix of qualified and unqualified field with same
unqualified name
- // note that we need to sort the contents of the HashSet first so that
errors are
- // deterministic
- let mut qualified_names = qualified_names
- .iter()
- .map(|(l, r)| (l.to_owned(), r.to_owned()))
- .collect::<Vec<(&OwnedTableReference, &String)>>();
- qualified_names.sort();
+ // Check for mix of qualified and unqualified fields with same
unqualified name.
+ // The BTreeSet storage makes sure that errors are reported in
deterministic order.
for (qualifier, name) in &qualified_names {
if unqualified_names.contains(name) {
return _schema_err!(SchemaError::AmbiguousReference {