This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b9328b9734 Upgrade to sqlparser 0.61.0 (#20177)
b9328b9734 is described below
commit b9328b9734534c8e2ce92b00c5368f88956bf5c3
Author: Andrew Lamb <[email protected]>
AuthorDate: Mon Feb 23 13:49:08 2026 -0500
Upgrade to sqlparser 0.61.0 (#20177)
DRAFT until SQL parser is released
## Which issue does this PR close?
- part of https://github.com/apache/datafusion-sqlparser-rs/issues/2117
## Rationale for this change
Keep up to date with dependencies
I think @Samyak2 specifically would like access to the `:` field syntax
## What changes are included in this PR?
1. Update to 0.61.0
2. Update APIs
## Are these changes tested?
Yes by existing tests
## Are there any user-facing changes?
New dependency
---------
Co-authored-by: Jeffrey Vo <[email protected]>
---
Cargo.lock | 8 +-
Cargo.toml | 2 +-
datafusion/sql/src/expr/mod.rs | 6 ++
datafusion/sql/src/select.rs | 1 +
datafusion/sql/src/statement.rs | 117 +++++++++++++++-----------
datafusion/sql/src/unparser/ast.rs | 7 +-
datafusion/sql/src/unparser/dialect.rs | 1 +
datafusion/sql/src/unparser/expr.rs | 7 ++
datafusion/sql/src/utils.rs | 2 +
datafusion/sqllogictest/test_files/select.slt | 27 +++++-
10 files changed, 123 insertions(+), 55 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 1d4229ac25..99abf5b060 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -5750,9 +5750,9 @@ dependencies = [
[[package]]
name = "sqlparser"
-version = "0.60.0"
+version = "0.61.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "505aa16b045c4c1375bf5f125cce3813d0176325bfe9ffc4a903f423de7774ff"
+checksum = "dbf5ea8d4d7c808e1af1cbabebca9a2abe603bcefc22294c5b95018d53200cb7"
dependencies = [
"log",
"recursive",
@@ -5761,9 +5761,9 @@ dependencies = [
[[package]]
name = "sqlparser_derive"
-version = "0.4.0"
+version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "028e551d5e270b31b9f3ea271778d9d827148d4287a5d96167b6bb9787f5cc38"
+checksum = "a6dd45d8fc1c79299bfbb7190e42ccbbdf6a5f52e4a6ad98d92357ea965bd289"
dependencies = [
"proc-macro2",
"quote",
diff --git a/Cargo.toml b/Cargo.toml
index 60904a70c9..0fa416cc05 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -183,7 +183,7 @@ regex = "1.12"
rstest = "0.26.1"
serde_json = "1"
sha2 = "^0.10.9"
-sqlparser = { version = "0.60.0", default-features = false, features = ["std",
"visitor"] }
+sqlparser = { version = "0.61.0", default-features = false, features = ["std",
"visitor"] }
strum = "0.27.2"
strum_macros = "0.27.2"
tempfile = "3"
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index dbf2ce6773..9aa5be8131 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -267,11 +267,16 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
planner_context,
),
+ SQLExpr::Cast { array: true, .. } => {
+ not_impl_err!("`CAST(... AS type ARRAY`) not supported")
+ }
+
SQLExpr::Cast {
kind: CastKind::Cast | CastKind::DoubleColon,
expr,
data_type,
format,
+ array: false,
} => {
self.sql_cast_to_expr(*expr, &data_type, format, schema,
planner_context)
}
@@ -281,6 +286,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
expr,
data_type,
format,
+ array: false,
} => {
if let Some(format) = format {
return not_impl_err!("CAST with format is not supported:
{format}");
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 28e7ac2f20..edf4b9ef79 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -361,6 +361,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
// Process distinct clause
let plan = match select.distinct {
None => Ok(plan),
+ Some(Distinct::All) => Ok(plan),
Some(Distinct::Distinct) => {
LogicalPlanBuilder::from(plan).distinct()?.build()
}
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 14ec64f874..32bc8cb244 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -342,26 +342,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
refresh_mode,
initialize,
require_user,
+ partition_of,
+ for_values,
}) => {
if temporary {
- return not_impl_err!("Temporary tables not supported")?;
+ return not_impl_err!("Temporary tables not supported");
}
if external {
- return not_impl_err!("External tables not supported")?;
+ return not_impl_err!("External tables not supported");
}
if global.is_some() {
- return not_impl_err!("Global tables not supported")?;
+ return not_impl_err!("Global tables not supported");
}
if transient {
- return not_impl_err!("Transient tables not supported")?;
+ return not_impl_err!("Transient tables not supported");
}
if volatile {
- return not_impl_err!("Volatile tables not supported")?;
+ return not_impl_err!("Volatile tables not supported");
}
if hive_distribution != ast::HiveDistributionStyle::NONE {
return not_impl_err!(
"Hive distribution not supported:
{hive_distribution:?}"
- )?;
+ );
}
if hive_formats.is_some()
&& !matches!(
@@ -374,122 +376,126 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
})
)
{
- return not_impl_err!(
- "Hive formats not supported: {hive_formats:?}"
- )?;
+ return not_impl_err!("Hive formats not supported:
{hive_formats:?}");
}
if file_format.is_some() {
- return not_impl_err!("File format not supported")?;
+ return not_impl_err!("File format not supported");
}
if location.is_some() {
- return not_impl_err!("Location not supported")?;
+ return not_impl_err!("Location not supported");
}
if without_rowid {
- return not_impl_err!("Without rowid not supported")?;
+ return not_impl_err!("Without rowid not supported");
}
if like.is_some() {
- return not_impl_err!("Like not supported")?;
+ return not_impl_err!("Like not supported");
}
if clone.is_some() {
- return not_impl_err!("Clone not supported")?;
+ return not_impl_err!("Clone not supported");
}
if comment.is_some() {
- return not_impl_err!("Comment not supported")?;
+ return not_impl_err!("Comment not supported");
}
if on_commit.is_some() {
- return not_impl_err!("On commit not supported")?;
+ return not_impl_err!("On commit not supported");
}
if on_cluster.is_some() {
- return not_impl_err!("On cluster not supported")?;
+ return not_impl_err!("On cluster not supported");
}
if primary_key.is_some() {
- return not_impl_err!("Primary key not supported")?;
+ return not_impl_err!("Primary key not supported");
}
if order_by.is_some() {
- return not_impl_err!("Order by not supported")?;
+ return not_impl_err!("Order by not supported");
}
if partition_by.is_some() {
- return not_impl_err!("Partition by not supported")?;
+ return not_impl_err!("Partition by not supported");
}
if cluster_by.is_some() {
- return not_impl_err!("Cluster by not supported")?;
+ return not_impl_err!("Cluster by not supported");
}
if clustered_by.is_some() {
- return not_impl_err!("Clustered by not supported")?;
+ return not_impl_err!("Clustered by not supported");
}
if strict {
- return not_impl_err!("Strict not supported")?;
+ return not_impl_err!("Strict not supported");
}
if copy_grants {
- return not_impl_err!("Copy grants not supported")?;
+ return not_impl_err!("Copy grants not supported");
}
if enable_schema_evolution.is_some() {
- return not_impl_err!("Enable schema evolution not
supported")?;
+ return not_impl_err!("Enable schema evolution not
supported");
}
if change_tracking.is_some() {
- return not_impl_err!("Change tracking not supported")?;
+ return not_impl_err!("Change tracking not supported");
}
if data_retention_time_in_days.is_some() {
- return not_impl_err!("Data retention time in days not
supported")?;
+ return not_impl_err!("Data retention time in days not
supported");
}
if max_data_extension_time_in_days.is_some() {
return not_impl_err!(
"Max data extension time in days not supported"
- )?;
+ );
}
if default_ddl_collation.is_some() {
- return not_impl_err!("Default DDL collation not
supported")?;
+ return not_impl_err!("Default DDL collation not
supported");
}
if with_aggregation_policy.is_some() {
- return not_impl_err!("With aggregation policy not
supported")?;
+ return not_impl_err!("With aggregation policy not
supported");
}
if with_row_access_policy.is_some() {
- return not_impl_err!("With row access policy not
supported")?;
+ return not_impl_err!("With row access policy not
supported");
}
if with_tags.is_some() {
- return not_impl_err!("With tags not supported")?;
+ return not_impl_err!("With tags not supported");
}
if iceberg {
- return not_impl_err!("Iceberg not supported")?;
+ return not_impl_err!("Iceberg not supported");
}
if external_volume.is_some() {
- return not_impl_err!("External volume not supported")?;
+ return not_impl_err!("External volume not supported");
}
if base_location.is_some() {
- return not_impl_err!("Base location not supported")?;
+ return not_impl_err!("Base location not supported");
}
if catalog.is_some() {
- return not_impl_err!("Catalog not supported")?;
+ return not_impl_err!("Catalog not supported");
}
if catalog_sync.is_some() {
- return not_impl_err!("Catalog sync not supported")?;
+ return not_impl_err!("Catalog sync not supported");
}
if storage_serialization_policy.is_some() {
- return not_impl_err!("Storage serialization policy not
supported")?;
+ return not_impl_err!("Storage serialization policy not
supported");
}
if inherits.is_some() {
- return not_impl_err!("Table inheritance not supported")?;
+ return not_impl_err!("Table inheritance not supported");
}
if dynamic {
- return not_impl_err!("Dynamic tables not supported")?;
+ return not_impl_err!("Dynamic tables not supported");
}
if version.is_some() {
- return not_impl_err!("Version not supported")?;
+ return not_impl_err!("Version not supported");
}
if target_lag.is_some() {
- return not_impl_err!("Target lag not supported")?;
+ return not_impl_err!("Target lag not supported");
}
if warehouse.is_some() {
- return not_impl_err!("Warehouse not supported")?;
+ return not_impl_err!("Warehouse not supported");
}
if refresh_mode.is_some() {
- return not_impl_err!("Refresh mode not supported")?;
+ return not_impl_err!("Refresh mode not supported");
}
if initialize.is_some() {
- return not_impl_err!("Initialize not supported")?;
+ return not_impl_err!("Initialize not supported");
}
if require_user {
- return not_impl_err!("Require user not supported")?;
+ return not_impl_err!("Require user not supported");
+ }
+ if partition_of.is_some() {
+ return not_impl_err!("PARTITION OF not supported");
+ }
+ if for_values.is_some() {
+ return not_impl_err!("PARTITION OF .. FOR VALUES .. not
supported");
}
// Merge inline constraints and existing constraints
let mut all_constraints = constraints;
@@ -989,7 +995,8 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
has_table_keyword,
settings,
format_clause,
- insert_token: _insert_token, // record the location the
`INSERT` token
+ insert_token: _, // record the location the `INSERT` token
+ optimizer_hint,
}) => {
let table_name = match table {
TableObject::TableName(table_name) => table_name,
@@ -1045,6 +1052,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if format_clause.is_some() {
plan_err!("Inserts with format clause not supported")?;
}
+ if optimizer_hint.is_some() {
+ plan_err!("Optimizer hints not supported")?;
+ }
// optional keywords don't change behavior
let _ = into;
let _ = has_table_keyword;
@@ -1059,6 +1069,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
or,
limit,
update_token: _,
+ optimizer_hint,
}) => {
let from_clauses =
from.map(|update_table_from_kind| match
update_table_from_kind {
@@ -1079,6 +1090,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if limit.is_some() {
return not_impl_err!("Update-limit clause not supported")?;
}
+ if optimizer_hint.is_some() {
+ plan_err!("Optimizer hints not supported")?;
+ }
self.update_to_plan(table, &assignments, update_from,
selection)
}
@@ -1091,6 +1105,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
order_by,
limit,
delete_token: _,
+ optimizer_hint,
}) => {
if !tables.is_empty() {
plan_err!("DELETE <TABLE> not supported")?;
@@ -1108,6 +1123,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
plan_err!("Delete-order-by clause not yet supported")?;
}
+ if optimizer_hint.is_some() {
+ plan_err!("Optimizer hints not supported")?;
+ }
+
let table_name = self.get_delete_target(from)?;
self.delete_to_plan(&table_name, selection, limit)
}
@@ -1393,6 +1412,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
cascade,
on_cluster,
table,
+ if_exists,
}) => {
let _ = table; // Support TRUNCATE TABLE and TRUNCATE syntax
if table_names.len() != 1 {
@@ -1421,6 +1441,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
if on_cluster.is_some() {
return not_impl_err!("TRUNCATE with ON CLUSTER is not
supported");
}
+ if if_exists {
+ return not_impl_err!("TRUNCATE .. with IF EXISTS is not
supported");
+ }
let table =
self.object_name_to_table_reference(target.name.clone())?;
let source =
self.context_provider.get_table_source(table.clone())?;
diff --git a/datafusion/sql/src/unparser/ast.rs
b/datafusion/sql/src/unparser/ast.rs
index ec78a42d65..8446a44b07 100644
--- a/datafusion/sql/src/unparser/ast.rs
+++ b/datafusion/sql/src/unparser/ast.rs
@@ -315,7 +315,9 @@ impl SelectBuilder {
}
pub fn build(&self) -> Result<ast::Select, BuilderError> {
Ok(ast::Select {
+ optimizer_hint: None,
distinct: self.distinct.clone(),
+ select_modifiers: None,
top_before_distinct: false,
top: self.top.clone(),
projection: self.projection.clone().unwrap_or_default(),
@@ -340,12 +342,12 @@ impl SelectBuilder {
named_window: self.named_window.clone(),
qualify: self.qualify.clone(),
value_table_mode: self.value_table_mode,
- connect_by: None,
+ connect_by: Vec::new(),
window_before_qualify: false,
prewhere: None,
select_token: AttachedToken::empty(),
flavor: match self.flavor {
- Some(ref value) => value.clone(),
+ Some(ref value) => *value,
None => return
Err(Into::into(UninitializedFieldError::from("flavor"))),
},
exclude: None,
@@ -608,6 +610,7 @@ impl DerivedRelationBuilder {
}
},
alias: self.alias.clone(),
+ sample: None,
})
}
fn create_empty() -> Self {
diff --git a/datafusion/sql/src/unparser/dialect.rs
b/datafusion/sql/src/unparser/dialect.rs
index 1a3e1a06db..31d2662cc4 100644
--- a/datafusion/sql/src/unparser/dialect.rs
+++ b/datafusion/sql/src/unparser/dialect.rs
@@ -372,6 +372,7 @@ impl PostgreSqlDialect {
kind: ast::CastKind::Cast,
expr: Box::new(expr.clone()),
data_type:
ast::DataType::Numeric(ast::ExactNumberInfo::None),
+ array: false,
format: None,
};
}
diff --git a/datafusion/sql/src/unparser/expr.rs
b/datafusion/sql/src/unparser/expr.rs
index 5f6612830a..59a9207b51 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -494,6 +494,7 @@ impl Unparser<'_> {
kind: ast::CastKind::TryCast,
expr: Box::new(inner_expr),
data_type: self.arrow_dtype_to_ast_dtype(data_type)?,
+ array: false,
format: None,
})
}
@@ -1145,6 +1146,7 @@ impl Unparser<'_> {
kind: ast::CastKind::Cast,
expr: Box::new(ast::Expr::value(SingleQuotedString(ts))),
data_type: self.dialect.timestamp_cast_dtype(&time_unit, &None),
+ array: false,
format: None,
})
}
@@ -1167,6 +1169,7 @@ impl Unparser<'_> {
kind: ast::CastKind::Cast,
expr: Box::new(ast::Expr::value(SingleQuotedString(time))),
data_type: ast::DataType::Time(None, TimezoneInfo::None),
+ array: false,
format: None,
})
}
@@ -1184,6 +1187,7 @@ impl Unparser<'_> {
kind: ast::CastKind::Cast,
expr: Box::new(inner_expr),
data_type: self.arrow_dtype_to_ast_dtype(data_type)?,
+ array: false,
format: None,
}),
},
@@ -1191,6 +1195,7 @@ impl Unparser<'_> {
kind: ast::CastKind::Cast,
expr: Box::new(inner_expr),
data_type: self.arrow_dtype_to_ast_dtype(data_type)?,
+ array: false,
format: None,
}),
}
@@ -1332,6 +1337,7 @@ impl Unparser<'_> {
date.to_string(),
))),
data_type: ast::DataType::Date,
+ array: false,
format: None,
})
}
@@ -1355,6 +1361,7 @@ impl Unparser<'_> {
datetime.to_string(),
))),
data_type: self.ast_type_for_date64_in_cast(),
+ array: false,
format: None,
})
}
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 9205336a52..16ac353d4b 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -331,6 +331,8 @@ pub(crate) fn value_to_string(value: &Value) ->
Option<String> {
Value::Number(_, _) | Value::Boolean(_) => Some(value.to_string()),
Value::UnicodeStringLiteral(s) => Some(s.to_string()),
Value::EscapedStringLiteral(s) => Some(s.to_string()),
+ Value::QuoteDelimitedStringLiteral(s)
+ | Value::NationalQuoteDelimitedStringLiteral(s) =>
Some(s.value.to_string()),
Value::DoubleQuotedString(_)
| Value::NationalStringLiteral(_)
| Value::SingleQuotedByteStringLiteral(_)
diff --git a/datafusion/sqllogictest/test_files/select.slt
b/datafusion/sqllogictest/test_files/select.slt
index d49ccb9fe9..553ccb74de 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -820,7 +820,7 @@ SELECT ALL c1 FROM aggregate_simple order by c1
0.00005
0.00005
-# select distinct
+# SELECT DISTINCT
query RRB rowsort
SELECT DISTINCT * FROM aggregate_simple
----
@@ -830,6 +830,31 @@ SELECT DISTINCT * FROM aggregate_simple
0.00004 0.000000000004 false
0.00005 0.000000000005 true
+# select ALL (inverse of distinct)
+query RRB rowsort
+SELECT ALL * FROM aggregate_simple;
+----
+0.00001 0.000000000001 true
+0.00002 0.000000000002 false
+0.00002 0.000000000002 false
+0.00003 0.000000000003 true
+0.00003 0.000000000003 true
+0.00003 0.000000000003 true
+0.00004 0.000000000004 false
+0.00004 0.000000000004 false
+0.00004 0.000000000004 false
+0.00004 0.000000000004 false
+0.00005 0.000000000005 true
+0.00005 0.000000000005 true
+0.00005 0.000000000005 true
+0.00005 0.000000000005 true
+0.00005 0.000000000005 true
+
+
+# select distinct all (
+query error DataFusion error: SQL error: ParserError\("Cannot specify DISTINCT
then ALL at Line: 1, Column: 8"\)
+SELECT DISTINCT ALL * FROM aggregate_simple
+
# select distinct with projection and order by
query R
SELECT DISTINCT c1 FROM aggregate_simple order by c1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]