This is an automated email from the ASF dual-hosted git repository.
iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 0647a4aa Consolidate `MapAccess`, and `Subscript` into `CompoundExpr`
to handle the complex field access chain (#1551)
0647a4aa is described below
commit 0647a4aa829954397bd72369865f44bbab19ba2b
Author: Jax Liu <[email protected]>
AuthorDate: Sun Dec 22 22:28:44 2024 +0800
Consolidate `MapAccess`, and `Subscript` into `CompoundExpr` to handle the
complex field access chain (#1551)
---
src/ast/mod.rs | 106 ++++++++--------
src/ast/spans.rs | 44 ++++---
src/dialect/snowflake.rs | 4 +
src/parser/mod.rs | 284 ++++++++++++++++++++++++++++--------------
tests/sqlparser_bigquery.rs | 58 ++++++---
tests/sqlparser_clickhouse.rs | 24 ++--
tests/sqlparser_common.rs | 76 ++++++++---
tests/sqlparser_duckdb.rs | 8 +-
tests/sqlparser_postgres.rs | 138 ++++++++++----------
9 files changed, 455 insertions(+), 287 deletions(-)
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 45dbba2a..9fb2bb9c 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -459,40 +459,6 @@ pub enum CastFormat {
ValueAtTimeZone(Value, Value),
}
-/// Represents the syntax/style used in a map access.
-#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub enum MapAccessSyntax {
- /// Access using bracket notation. `mymap[mykey]`
- Bracket,
- /// Access using period notation. `mymap.mykey`
- Period,
-}
-
-/// Expression used to access a value in a nested structure.
-///
-/// Example: `SAFE_OFFSET(0)` in
-/// ```sql
-/// SELECT mymap[SAFE_OFFSET(0)];
-/// ```
-#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
-#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
-#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
-pub struct MapAccessKey {
- pub key: Expr,
- pub syntax: MapAccessSyntax,
-}
-
-impl fmt::Display for MapAccessKey {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match self.syntax {
- MapAccessSyntax::Bracket => write!(f, "[{}]", self.key),
- MapAccessSyntax::Period => write!(f, ".{}", self.key),
- }
- }
-}
-
/// An element of a JSON path.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -629,6 +595,28 @@ pub enum Expr {
Identifier(Ident),
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
CompoundIdentifier(Vec<Ident>),
+ /// Multi-part expression access.
+ ///
+ /// This structure represents an access chain in structured / nested types
+ /// such as maps, arrays, and lists:
+ /// - Array
+ /// - A 1-dim array `a[1]` will be represented like:
+ /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1)]`
+ /// - A 2-dim array `a[1][2]` will be represented like:
+ /// `CompoundFieldAccess(Ident('a'), vec![Subscript(1),
Subscript(2)]`
+ /// - Map or Struct (Bracket-style)
+ /// - A map `a['field1']` will be represented like:
+ /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field')]`
+ /// - A 2-dim map `a['field1']['field2']` will be represented like:
+ /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'),
Subscript('field2')]`
+ /// - Struct (Dot-style) (only effect when the chain contains both
subscript and expr)
+ /// - A struct access `a[field1].field2` will be represented like:
+ /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'),
Ident('field2')]`
+ /// - If a struct access likes `a.field1.field2`, it will be represented
by CompoundIdentifier([a, field1, field2])
+ CompoundFieldAccess {
+ root: Box<Expr>,
+ access_chain: Vec<AccessExpr>,
+ },
/// Access data nested in a value containing semi-structured data, such as
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
///
@@ -882,14 +870,6 @@ pub enum Expr {
data_type: DataType,
value: String,
},
- /// Access a map-like object by field (e.g. `column['field']` or
`column[4]`
- /// Note that depending on the dialect, struct like accesses may be
- /// parsed as [`Subscript`](Self::Subscript) or
[`MapAccess`](Self::MapAccess)
- /// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
- MapAccess {
- column: Box<Expr>,
- keys: Vec<MapAccessKey>,
- },
/// Scalar function call e.g. `LEFT(foo, 5)`
Function(Function),
/// Arbitrary expr method call
@@ -978,11 +958,6 @@ pub enum Expr {
/// ```
/// [1]: https://duckdb.org/docs/sql/data_types/map#creating-maps
Map(Map),
- /// An access of nested data using subscript syntax, for example
`array[2]`.
- Subscript {
- expr: Box<Expr>,
- subscript: Box<Subscript>,
- },
/// An array expression e.g. `ARRAY[1, 2]`
Array(Array),
/// An interval expression e.g. `INTERVAL '1' YEAR`
@@ -1099,6 +1074,27 @@ impl fmt::Display for Subscript {
}
}
+/// An element of a [`Expr::CompoundFieldAccess`].
+/// It can be an expression or a subscript.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum AccessExpr {
+ /// Accesses a field using dot notation, e.g. `foo.bar.baz`.
+ Dot(Expr),
+ /// Accesses a field or array element using bracket notation, e.g.
`foo['bar']`.
+ Subscript(Subscript),
+}
+
+impl fmt::Display for AccessExpr {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ AccessExpr::Dot(expr) => write!(f, ".{}", expr),
+ AccessExpr::Subscript(subscript) => write!(f, "[{}]", subscript),
+ }
+ }
+}
+
/// A lambda function.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -1295,12 +1291,16 @@ impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expr::Identifier(s) => write!(f, "{s}"),
- Expr::MapAccess { column, keys } => {
- write!(f, "{column}{}", display_separated(keys, ""))
- }
Expr::Wildcard(_) => f.write_str("*"),
Expr::QualifiedWildcard(prefix, _) => write!(f, "{}.*", prefix),
Expr::CompoundIdentifier(s) => write!(f, "{}",
display_separated(s, ".")),
+ Expr::CompoundFieldAccess { root, access_chain } => {
+ write!(f, "{}", root)?;
+ for field in access_chain {
+ write!(f, "{}", field)?;
+ }
+ Ok(())
+ }
Expr::IsTrue(ast) => write!(f, "{ast} IS TRUE"),
Expr::IsNotTrue(ast) => write!(f, "{ast} IS NOT TRUE"),
Expr::IsFalse(ast) => write!(f, "{ast} IS FALSE"),
@@ -1720,12 +1720,6 @@ impl fmt::Display for Expr {
Expr::Map(map) => {
write!(f, "{map}")
}
- Expr::Subscript {
- expr,
- subscript: key,
- } => {
- write!(f, "{expr}[{key}]")
- }
Expr::Array(set) => {
write!(f, "{set}")
}
diff --git a/src/ast/spans.rs b/src/ast/spans.rs
index 6168587c..9ba3bdd9 100644
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@@ -20,20 +20,20 @@ use core::iter;
use crate::tokenizer::Span;
use super::{
- dcl::SecondaryRoles, AlterColumnOperation, AlterIndexOperation,
AlterTableOperation, Array,
- Assignment, AssignmentTarget, CloseCursor, ClusteredIndex, ColumnDef,
ColumnOption,
- ColumnOptionDef, ConflictTarget, ConnectBy, ConstraintCharacteristics,
CopySource, CreateIndex,
- CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem,
ExcludeSelectItem,
- Expr, ExprWithAlias, Fetch, FromTable, Function, FunctionArg,
FunctionArgExpr,
- FunctionArgumentClause, FunctionArgumentList, FunctionArguments,
GroupByExpr, HavingBound,
- IlikeSelectItem, Insert, Interpolate, InterpolateExpr, Join,
JoinConstraint, JoinOperator,
- JsonPath, JsonPathElem, LateralView, MatchRecognizePattern, Measure,
NamedWindowDefinition,
- ObjectName, Offset, OnConflict, OnConflictAction, OnInsert, OrderBy,
OrderByExpr, Partition,
- PivotValueSource, ProjectionSelect, Query, ReferentialAction,
RenameSelectItem,
- ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem,
SetExpr, SqlOption,
- Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef,
TableConstraint,
- TableFactor, TableOptionsClustered, TableWithJoins, Use, Value, Values,
ViewColumnDef,
- WildcardAdditionalOptions, With, WithFill,
+ dcl::SecondaryRoles, AccessExpr, AlterColumnOperation, AlterIndexOperation,
+ AlterTableOperation, Array, Assignment, AssignmentTarget, CloseCursor,
ClusteredIndex,
+ ColumnDef, ColumnOption, ColumnOptionDef, ConflictTarget, ConnectBy,
ConstraintCharacteristics,
+ CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete,
DoUpdate,
+ ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch,
FromTable, Function,
+ FunctionArg, FunctionArgExpr, FunctionArgumentClause,
FunctionArgumentList, FunctionArguments,
+ GroupByExpr, HavingBound, IlikeSelectItem, Insert, Interpolate,
InterpolateExpr, Join,
+ JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView,
MatchRecognizePattern,
+ Measure, NamedWindowDefinition, ObjectName, Offset, OnConflict,
OnConflictAction, OnInsert,
+ OrderBy, OrderByExpr, Partition, PivotValueSource, ProjectionSelect,
Query, ReferentialAction,
+ RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select,
SelectInto, SelectItem,
+ SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias,
TableAliasColumnDef,
+ TableConstraint, TableFactor, TableOptionsClustered, TableWithJoins, Use,
Value, Values,
+ ViewColumnDef, WildcardAdditionalOptions, With, WithFill,
};
/// Given an iterator of spans, return the [Span::union] of all spans.
@@ -1262,6 +1262,9 @@ impl Spanned for Expr {
Expr::Identifier(ident) => ident.span,
Expr::CompoundIdentifier(vec) => union_spans(vec.iter().map(|i|
i.span)),
Expr::CompositeAccess { expr, key } =>
expr.span().union(&key.span),
+ Expr::CompoundFieldAccess { root, access_chain } => {
+
union_spans(iter::once(root.span()).chain(access_chain.iter().map(|i|
i.span())))
+ }
Expr::IsFalse(expr) => expr.span(),
Expr::IsNotFalse(expr) => expr.span(),
Expr::IsTrue(expr) => expr.span(),
@@ -1336,9 +1339,6 @@ impl Spanned for Expr {
Expr::Nested(expr) => expr.span(),
Expr::Value(value) => value.span(),
Expr::TypedString { .. } => Span::empty(),
- Expr::MapAccess { column, keys } => column
- .span()
- .union(&union_spans(keys.iter().map(|i| i.key.span()))),
Expr::Function(function) => function.span(),
Expr::GroupingSets(vec) => {
union_spans(vec.iter().flat_map(|i| i.iter().map(|k|
k.span())))
@@ -1434,7 +1434,6 @@ impl Spanned for Expr {
Expr::Named { .. } => Span::empty(),
Expr::Dictionary(_) => Span::empty(),
Expr::Map(_) => Span::empty(),
- Expr::Subscript { expr, subscript } =>
expr.span().union(&subscript.span()),
Expr::Interval(interval) => interval.value.span(),
Expr::Wildcard(token) => token.0.span,
Expr::QualifiedWildcard(object_name, token) => union_spans(
@@ -1473,6 +1472,15 @@ impl Spanned for Subscript {
}
}
+impl Spanned for AccessExpr {
+ fn span(&self) -> Span {
+ match self {
+ AccessExpr::Dot(ident) => ident.span(),
+ AccessExpr::Subscript(subscript) => subscript.span(),
+ }
+ }
+}
+
impl Spanned for ObjectName {
fn span(&self) -> Span {
let ObjectName(segments) = self;
diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs
index 50e383db..045e5062 100644
--- a/src/dialect/snowflake.rs
+++ b/src/dialect/snowflake.rs
@@ -234,6 +234,10 @@ impl Dialect for SnowflakeDialect {
RESERVED_FOR_IDENTIFIER.contains(&kw)
}
}
+
+ fn supports_partiql(&self) -> bool {
+ true
+ }
}
/// Parse snowflake create table statement.
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 5ee8ae21..af4b7b45 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1161,53 +1161,39 @@ impl<'a> Parser<'a> {
w_span: Span,
) -> Result<Expr, ParserError> {
match self.peek_token().token {
- Token::LParen | Token::Period => {
- let mut id_parts: Vec<Ident> = vec![w.to_ident(w_span)];
- let mut ending_wildcard: Option<TokenWithSpan> = None;
- while self.consume_token(&Token::Period) {
- let next_token = self.next_token();
- match next_token.token {
- Token::Word(w) =>
id_parts.push(w.to_ident(next_token.span)),
- Token::Mul => {
- // Postgres explicitly allows funcnm(tablenm.*)
and the
- // function array_agg traverses this control flow
- if dialect_of!(self is PostgreSqlDialect) {
- ending_wildcard = Some(next_token);
- break;
- } else {
- return self.expected("an identifier after
'.'", next_token);
- }
- }
- Token::SingleQuotedString(s) =>
id_parts.push(Ident::with_quote('\'', s)),
- _ => {
- return self.expected("an identifier or a '*' after
'.'", next_token);
- }
- }
- }
-
- if let Some(wildcard_token) = ending_wildcard {
- Ok(Expr::QualifiedWildcard(
- ObjectName(id_parts),
- AttachedToken(wildcard_token),
- ))
- } else if self.consume_token(&Token::LParen) {
- if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
- && self.consume_tokens(&[Token::Plus, Token::RParen])
- {
- Ok(Expr::OuterJoin(Box::new(
- match <[Ident; 1]>::try_from(id_parts) {
- Ok([ident]) => Expr::Identifier(ident),
- Err(parts) => Expr::CompoundIdentifier(parts),
- },
- )))
- } else {
- self.prev_token();
- self.parse_function(ObjectName(id_parts))
- }
+ Token::Period => {
+
self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![])
+ }
+ Token::LParen => {
+ let id_parts = vec![w.to_ident(w_span)];
+ if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
+ Ok(expr)
} else {
- Ok(Expr::CompoundIdentifier(id_parts))
+ let mut expr = self.parse_function(ObjectName(id_parts))?;
+ // consume all period if it's a method chain
+ expr = self.try_parse_method(expr)?;
+ let fields = vec![];
+ self.parse_compound_field_access(expr, fields)
}
}
+ Token::LBracket if dialect_of!(self is PostgreSqlDialect |
DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
+ {
+ let ident = Expr::Identifier(w.to_ident(w_span));
+ let mut fields = vec![];
+ self.parse_multi_dim_subscript(&mut fields)?;
+ self.parse_compound_field_access(ident, fields)
+ }
+ // string introducer
https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
+ Token::SingleQuotedString(_)
+ | Token::DoubleQuotedString(_)
+ | Token::HexStringLiteral(_)
+ if w.value.starts_with('_') =>
+ {
+ Ok(Expr::IntroducedString {
+ introducer: w.value.clone(),
+ value: self.parse_introduced_string_value()?,
+ })
+ }
// string introducer
https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html
Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
@@ -1426,6 +1412,144 @@ impl<'a> Parser<'a> {
}
}
+ /// Try to parse an [Expr::CompoundFieldAccess] like `a.b.c` or `a.b[1].c`.
+ /// If all the fields are `Expr::Identifier`s, return an
[Expr::CompoundIdentifier] instead.
+ /// If only the root exists, return the root.
+ /// If self supports [Dialect::supports_partiql], it will fall back when
occurs [Token::LBracket] for JsonAccess parsing.
+ pub fn parse_compound_field_access(
+ &mut self,
+ root: Expr,
+ mut chain: Vec<AccessExpr>,
+ ) -> Result<Expr, ParserError> {
+ let mut ending_wildcard: Option<TokenWithSpan> = None;
+ let mut ending_lbracket = false;
+ while self.consume_token(&Token::Period) {
+ let next_token = self.next_token();
+ match next_token.token {
+ Token::Word(w) => {
+ let expr = Expr::Identifier(w.to_ident(next_token.span));
+ chain.push(AccessExpr::Dot(expr));
+ if self.peek_token().token == Token::LBracket {
+ if self.dialect.supports_partiql() {
+ self.next_token();
+ ending_lbracket = true;
+ break;
+ } else {
+ self.parse_multi_dim_subscript(&mut chain)?
+ }
+ }
+ }
+ Token::Mul => {
+ // Postgres explicitly allows funcnm(tablenm.*) and the
+ // function array_agg traverses this control flow
+ if dialect_of!(self is PostgreSqlDialect) {
+ ending_wildcard = Some(next_token);
+ break;
+ } else {
+ return self.expected("an identifier after '.'",
next_token);
+ }
+ }
+ Token::SingleQuotedString(s) => {
+ let expr = Expr::Identifier(Ident::with_quote('\'', s));
+ chain.push(AccessExpr::Dot(expr));
+ }
+ _ => {
+ return self.expected("an identifier or a '*' after '.'",
next_token);
+ }
+ }
+ }
+
+ // if dialect supports partiql, we need to go back one Token::LBracket
for the JsonAccess parsing
+ if self.dialect.supports_partiql() && ending_lbracket {
+ self.prev_token();
+ }
+
+ if let Some(wildcard_token) = ending_wildcard {
+ if !Self::is_all_ident(&root, &chain) {
+ return self.expected("an identifier or a '*' after '.'",
self.peek_token());
+ };
+ Ok(Expr::QualifiedWildcard(
+ ObjectName(Self::exprs_to_idents(root, chain)?),
+ AttachedToken(wildcard_token),
+ ))
+ } else if self.peek_token().token == Token::LParen {
+ if !Self::is_all_ident(&root, &chain) {
+ // consume LParen
+ self.next_token();
+ return self.expected("an identifier or a '*' after '.'",
self.peek_token());
+ };
+ let id_parts = Self::exprs_to_idents(root, chain)?;
+ if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
+ Ok(expr)
+ } else {
+ self.parse_function(ObjectName(id_parts))
+ }
+ } else {
+ if Self::is_all_ident(&root, &chain) {
+ return Ok(Expr::CompoundIdentifier(Self::exprs_to_idents(
+ root, chain,
+ )?));
+ }
+ if chain.is_empty() {
+ return Ok(root);
+ }
+ Ok(Expr::CompoundFieldAccess {
+ root: Box::new(root),
+ access_chain: chain.clone(),
+ })
+ }
+ }
+
+ /// Check if the root is an identifier and all fields are identifiers.
+ fn is_all_ident(root: &Expr, fields: &[AccessExpr]) -> bool {
+ if !matches!(root, Expr::Identifier(_)) {
+ return false;
+ }
+ fields
+ .iter()
+ .all(|x| matches!(x, AccessExpr::Dot(Expr::Identifier(_))))
+ }
+
+ /// Convert a root and a list of fields to a list of identifiers.
+ fn exprs_to_idents(root: Expr, fields: Vec<AccessExpr>) ->
Result<Vec<Ident>, ParserError> {
+ let mut idents = vec![];
+ if let Expr::Identifier(root) = root {
+ idents.push(root);
+ for x in fields {
+ if let AccessExpr::Dot(Expr::Identifier(ident)) = x {
+ idents.push(ident);
+ } else {
+ return parser_err!(
+ format!("Expected identifier, found: {}", x),
+ x.span().start
+ );
+ }
+ }
+ Ok(idents)
+ } else {
+ parser_err!(
+ format!("Expected identifier, found: {}", root),
+ root.span().start
+ )
+ }
+ }
+
+ /// Try to parse OuterJoin expression `(+)`
+ fn parse_outer_join_expr(&mut self, id_parts: &[Ident]) -> Option<Expr> {
+ if dialect_of!(self is SnowflakeDialect | MsSqlDialect)
+ && self.consume_tokens(&[Token::LParen, Token::Plus,
Token::RParen])
+ {
+ Some(Expr::OuterJoin(Box::new(
+ match <[Ident; 1]>::try_from(id_parts.to_vec()) {
+ Ok([ident]) => Expr::Identifier(ident),
+ Err(parts) => Expr::CompoundIdentifier(parts),
+ },
+ )))
+ } else {
+ None
+ }
+ }
+
pub fn parse_utility_options(&mut self) -> Result<Vec<UtilityOption>,
ParserError> {
self.expect_token(&Token::LParen)?;
let options = self.parse_comma_separated(Self::parse_utility_option)?;
@@ -3042,13 +3166,18 @@ impl<'a> Parser<'a> {
expr: Box::new(expr),
})
} else if Token::LBracket == tok {
- if dialect_of!(self is PostgreSqlDialect | DuckDbDialect |
GenericDialect) {
- self.parse_subscript(expr)
- } else if dialect_of!(self is SnowflakeDialect) ||
self.dialect.supports_partiql() {
+ if dialect_of!(self is PostgreSqlDialect | DuckDbDialect |
GenericDialect | ClickHouseDialect | BigQueryDialect)
+ {
+ let mut chain = vec![];
+ // back to LBracket
+ self.prev_token();
+ self.parse_multi_dim_subscript(&mut chain)?;
+ self.parse_compound_field_access(expr, chain)
+ } else if self.dialect.supports_partiql() {
self.prev_token();
self.parse_json_access(expr)
} else {
- self.parse_map_access(expr)
+ parser_err!("Array subscripting is not supported",
tok.span.start)
}
} else if dialect_of!(self is SnowflakeDialect | GenericDialect) &&
Token::Colon == tok {
self.prev_token();
@@ -3144,15 +3273,24 @@ impl<'a> Parser<'a> {
})
}
+ /// Parse a multi-dimension array accessing like `[1:3][1][1]`
+ pub fn parse_multi_dim_subscript(
+ &mut self,
+ chain: &mut Vec<AccessExpr>,
+ ) -> Result<(), ParserError> {
+ while self.consume_token(&Token::LBracket) {
+ self.parse_subscript(chain)?;
+ }
+ Ok(())
+ }
+
/// Parses an array subscript like `[1:3]`
///
/// Parser is right after `[`
- pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError>
{
+ fn parse_subscript(&mut self, chain: &mut Vec<AccessExpr>) -> Result<(),
ParserError> {
let subscript = self.parse_subscript_inner()?;
- Ok(Expr::Subscript {
- expr: Box::new(expr),
- subscript: Box::new(subscript),
- })
+ chain.push(AccessExpr::Subscript(subscript));
+ Ok(())
}
fn parse_json_path_object_key(&mut self) -> Result<JsonPathElem,
ParserError> {
@@ -3214,46 +3352,6 @@ impl<'a> Parser<'a> {
Ok(JsonPath { path })
}
- pub fn parse_map_access(&mut self, expr: Expr) -> Result<Expr,
ParserError> {
- let key = self.parse_expr()?;
- self.expect_token(&Token::RBracket)?;
-
- let mut keys = vec![MapAccessKey {
- key,
- syntax: MapAccessSyntax::Bracket,
- }];
- loop {
- let key = match self.peek_token().token {
- Token::LBracket => {
- self.next_token(); // consume `[`
- let key = self.parse_expr()?;
- self.expect_token(&Token::RBracket)?;
- MapAccessKey {
- key,
- syntax: MapAccessSyntax::Bracket,
- }
- }
- // Access on BigQuery nested and repeated expressions can
- // mix notations in the same expression.
- //
https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns
- Token::Period if dialect_of!(self is BigQueryDialect) => {
- self.next_token(); // consume `.`
- MapAccessKey {
- key: self.parse_expr()?,
- syntax: MapAccessSyntax::Period,
- }
- }
- _ => break,
- };
- keys.push(key);
- }
-
- Ok(Expr::MapAccess {
- column: Box::new(expr),
- keys,
- })
- }
-
/// Parses the parens following the `[ NOT ] IN` operator.
pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result<Expr,
ParserError> {
// BigQuery allows `IN UNNEST(array_expression)`
diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs
index be383b47..9dfabc01 100644
--- a/tests/sqlparser_bigquery.rs
+++ b/tests/sqlparser_bigquery.rs
@@ -23,7 +23,7 @@ use std::ops::Deref;
use sqlparser::ast::*;
use sqlparser::dialect::{BigQueryDialect, GenericDialect};
use sqlparser::parser::{ParserError, ParserOptions};
-use sqlparser::tokenizer::Span;
+use sqlparser::tokenizer::{Location, Span};
use test_utils::*;
#[test]
@@ -1965,27 +1965,47 @@ fn parse_map_access_expr() {
let sql = "users[-1][safe_offset(2)].a.b";
let expr = bigquery().verified_expr(sql);
- fn map_access_key(key: Expr, syntax: MapAccessSyntax) -> MapAccessKey {
- MapAccessKey { key, syntax }
- }
- let expected = Expr::MapAccess {
- column: Expr::Identifier(Ident::new("users")).into(),
- keys: vec![
- map_access_key(
- Expr::UnaryOp {
+ let expected = Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Identifier(Ident::with_span(
+ Span::new(Location::of(1, 1), Location::of(1, 6)),
+ "users",
+ ))),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::UnaryOp {
op: UnaryOperator::Minus,
expr: Expr::Value(number("1")).into(),
},
- MapAccessSyntax::Bracket,
- ),
- map_access_key(
- call("safe_offset", [Expr::Value(number("2"))]),
- MapAccessSyntax::Bracket,
- ),
- map_access_key(
- Expr::CompoundIdentifier(vec![Ident::new("a"),
Ident::new("b")]),
- MapAccessSyntax::Period,
- ),
+ }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::Function(Function {
+ name: ObjectName(vec![Ident::with_span(
+ Span::new(Location::of(1, 11), Location::of(1, 22)),
+ "safe_offset",
+ )]),
+ parameters: FunctionArguments::None,
+ args: FunctionArguments::List(FunctionArgumentList {
+ duplicate_treatment: None,
+ args:
vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
+ number("2"),
+ )))],
+ clauses: vec![],
+ }),
+ filter: None,
+ null_treatment: None,
+ over: None,
+ within_group: vec![],
+ uses_odbc_syntax: false,
+ }),
+ }),
+ AccessExpr::Dot(Expr::Identifier(Ident::with_span(
+ Span::new(Location::of(1, 24), Location::of(1, 25)),
+ "a",
+ ))),
+ AccessExpr::Dot(Expr::Identifier(Ident::with_span(
+ Span::new(Location::of(1, 26), Location::of(1, 27)),
+ "b",
+ ))),
],
};
assert_eq!(expr, expected);
diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs
index d60506d9..2f1b043b 100644
--- a/tests/sqlparser_clickhouse.rs
+++ b/tests/sqlparser_clickhouse.rs
@@ -25,7 +25,7 @@ use helpers::attached_token::AttachedToken;
use sqlparser::tokenizer::Span;
use test_utils::*;
-use sqlparser::ast::Expr::{BinaryOp, Identifier, MapAccess};
+use sqlparser::ast::Expr::{BinaryOp, Identifier};
use sqlparser::ast::SelectItem::UnnamedExpr;
use sqlparser::ast::TableFactor::Table;
use sqlparser::ast::Value::Number;
@@ -44,22 +44,21 @@ fn parse_map_access_expr() {
select_token: AttachedToken::empty(),
top: None,
top_before_distinct: false,
- projection: vec![UnnamedExpr(MapAccess {
- column: Box::new(Identifier(Ident {
+ projection: vec![UnnamedExpr(Expr::CompoundFieldAccess {
+ root: Box::new(Identifier(Ident {
value: "string_values".to_string(),
quote_style: None,
span: Span::empty(),
})),
- keys: vec![MapAccessKey {
- key: call(
+ access_chain: vec![AccessExpr::Subscript(Subscript::Index {
+ index: call(
"indexOf",
[
Expr::Identifier(Ident::new("string_names")),
Expr::Value(Value::SingleQuotedString("endpoint".to_string()))
]
),
- syntax: MapAccessSyntax::Bracket
- }],
+ })],
})],
into: None,
from: vec![TableWithJoins {
@@ -76,18 +75,17 @@ fn parse_map_access_expr() {
}),
op: BinaryOperator::And,
right: Box::new(BinaryOp {
- left: Box::new(MapAccess {
- column:
Box::new(Identifier(Ident::new("string_value"))),
- keys: vec![MapAccessKey {
- key: call(
+ left: Box::new(Expr::CompoundFieldAccess {
+ root: Box::new(Identifier(Ident::new("string_value"))),
+ access_chain:
vec![AccessExpr::Subscript(Subscript::Index {
+ index: call(
"indexOf",
[
Expr::Identifier(Ident::new("string_name")),
Expr::Value(Value::SingleQuotedString("app".to_string()))
]
),
- syntax: MapAccessSyntax::Bracket
- }],
+ })],
}),
op: BinaryOperator::NotEq,
right:
Box::new(Expr::Value(Value::SingleQuotedString("foo".to_string()))),
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 8cc161f1..c294eab0 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -37,8 +37,8 @@ use sqlparser::dialect::{
};
use sqlparser::keywords::{Keyword, ALL_KEYWORDS};
use sqlparser::parser::{Parser, ParserError, ParserOptions};
-use sqlparser::tokenizer::Span;
use sqlparser::tokenizer::Tokenizer;
+use sqlparser::tokenizer::{Location, Span};
use test_utils::{
all_dialects, all_dialects_where, alter_table_op, assert_eq_vec, call,
expr_from_projection,
join, number, only, table, table_alias, table_from_name, TestedDialects,
@@ -2939,6 +2939,31 @@ fn parse_window_function_null_treatment_arg() {
);
}
+#[test]
+fn test_compound_expr() {
+ let supported_dialects = TestedDialects::new(vec![
+ Box::new(GenericDialect {}),
+ Box::new(DuckDbDialect {}),
+ Box::new(BigQueryDialect {}),
+ ]);
+ let sqls = [
+ "SELECT abc[1].f1 FROM t",
+ "SELECT abc[1].f1.f2 FROM t",
+ "SELECT f1.abc[1] FROM t",
+ "SELECT f1.f2.abc[1] FROM t",
+ "SELECT f1.abc[1].f2 FROM t",
+ "SELECT named_struct('a', 1, 'b', 2).a",
+ "SELECT named_struct('a', 1, 'b', 2).a",
+ "SELECT make_array(1, 2, 3)[1]",
+ "SELECT make_array(named_struct('a', 1))[1].a",
+ "SELECT abc[1][-1].a.b FROM t",
+ "SELECT abc[1][-1].a.b[1] FROM t",
+ ];
+ for sql in sqls {
+ supported_dialects.verified_stmt(sql);
+ }
+}
+
#[test]
fn parse_negative_value() {
let sql1 = "SELECT -1";
@@ -10174,20 +10199,39 @@ fn parse_map_access_expr() {
Box::new(ClickHouseDialect {}),
]);
let expr = dialects.verified_expr(sql);
- let expected = Expr::MapAccess {
- column: Expr::Identifier(Ident::new("users")).into(),
- keys: vec![
- MapAccessKey {
- key: Expr::UnaryOp {
+ let expected = Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Identifier(Ident::with_span(
+ Span::new(Location::of(1, 1), Location::of(1, 6)),
+ "users",
+ ))),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::UnaryOp {
op: UnaryOperator::Minus,
expr: Expr::Value(number("1")).into(),
},
- syntax: MapAccessSyntax::Bracket,
- },
- MapAccessKey {
- key: call("safe_offset", [Expr::Value(number("2"))]),
- syntax: MapAccessSyntax::Bracket,
- },
+ }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::Function(Function {
+ name: ObjectName(vec![Ident::with_span(
+ Span::new(Location::of(1, 11), Location::of(1, 22)),
+ "safe_offset",
+ )]),
+ parameters: FunctionArguments::None,
+ args: FunctionArguments::List(FunctionArgumentList {
+ duplicate_treatment: None,
+ args:
vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Value(
+ number("2"),
+ )))],
+ clauses: vec![],
+ }),
+ filter: None,
+ null_treatment: None,
+ over: None,
+ within_group: vec![],
+ uses_odbc_syntax: false,
+ }),
+ }),
],
};
assert_eq!(expr, expected);
@@ -10977,8 +11021,8 @@ fn test_map_syntax() {
check(
"MAP {'a': 10, 'b': 20}['a']",
- Expr::Subscript {
- expr: Box::new(Expr::Map(Map {
+ Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Map(Map {
entries: vec![
MapEntry {
key:
Box::new(Expr::Value(Value::SingleQuotedString("a".to_owned()))),
@@ -10990,9 +11034,9 @@ fn test_map_syntax() {
},
],
})),
- subscript: Box::new(Subscript::Index {
+ access_chain: vec![AccessExpr::Subscript(Subscript::Index {
index: Expr::Value(Value::SingleQuotedString("a".to_owned())),
- }),
+ })],
},
);
diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs
index d441cd19..db4ffb6f 100644
--- a/tests/sqlparser_duckdb.rs
+++ b/tests/sqlparser_duckdb.rs
@@ -630,8 +630,8 @@ fn test_array_index() {
_ => panic!("Expected an expression with alias"),
};
assert_eq!(
- &Expr::Subscript {
- expr: Box::new(Expr::Array(Array {
+ &Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Array(Array {
elem: vec![
Expr::Value(Value::SingleQuotedString("a".to_owned())),
Expr::Value(Value::SingleQuotedString("b".to_owned())),
@@ -639,9 +639,9 @@ fn test_array_index() {
],
named: false
})),
- subscript: Box::new(Subscript::Index {
+ access_chain: vec![AccessExpr::Subscript(Subscript::Index {
index: Expr::Value(number("3"))
- })
+ })]
},
expr
);
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index aaf4e65d..557e70bf 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -2095,11 +2095,11 @@ fn parse_array_index_expr() {
let sql = "SELECT foo[0] FROM foos";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::Subscript {
- expr: Box::new(Expr::Identifier(Ident::new("foo"))),
- subscript: Box::new(Subscript::Index {
+ &Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Identifier(Ident::new("foo"))),
+ access_chain: vec![AccessExpr::Subscript(Subscript::Index {
index: num[0].clone()
- }),
+ })],
},
expr_from_projection(only(&select.projection)),
);
@@ -2107,16 +2107,16 @@ fn parse_array_index_expr() {
let sql = "SELECT foo[0][0] FROM foos";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::Subscript {
- expr: Box::new(Expr::Subscript {
- expr: Box::new(Expr::Identifier(Ident::new("foo"))),
- subscript: Box::new(Subscript::Index {
+ &Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Identifier(Ident::new("foo"))),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Index {
index: num[0].clone()
}),
- }),
- subscript: Box::new(Subscript::Index {
- index: num[0].clone()
- }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: num[0].clone()
+ })
+ ],
},
expr_from_projection(only(&select.projection)),
);
@@ -2124,29 +2124,27 @@ fn parse_array_index_expr() {
let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#;
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::Subscript {
- expr: Box::new(Expr::Subscript {
- expr: Box::new(Expr::Subscript {
- expr: Box::new(Expr::Identifier(Ident::new("bar"))),
- subscript: Box::new(Subscript::Index {
- index: num[0].clone()
- })
+ &Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Identifier(Ident::new("bar"))),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Index {
+ index: num[0].clone()
}),
- subscript: Box::new(Subscript::Index {
+ AccessExpr::Subscript(Subscript::Index {
index: Expr::Identifier(Ident {
value: "baz".to_string(),
quote_style: Some('"'),
span: Span::empty(),
})
- })
- }),
- subscript: Box::new(Subscript::Index {
- index: Expr::Identifier(Ident {
- value: "fooz".to_string(),
- quote_style: Some('"'),
- span: Span::empty(),
- })
- })
+ }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::Identifier(Ident {
+ value: "fooz".to_string(),
+ quote_style: Some('"'),
+ span: Span::empty(),
+ })
+ }),
+ ],
},
expr_from_projection(only(&select.projection)),
);
@@ -2154,33 +2152,33 @@ fn parse_array_index_expr() {
let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::Subscript {
- expr: Box::new(Expr::Subscript {
- expr: Box::new(Expr::Nested(Box::new(Expr::Cast {
- kind: CastKind::Cast,
- expr: Box::new(Expr::Array(Array {
- elem: vec![Expr::Array(Array {
- elem: vec![num[2].clone(), num[3].clone(),],
- named: true,
- })],
+ &Expr::CompoundFieldAccess {
+ root: Box::new(Expr::Nested(Box::new(Expr::Cast {
+ kind: CastKind::Cast,
+ expr: Box::new(Expr::Array(Array {
+ elem: vec![Expr::Array(Array {
+ elem: vec![num[2].clone(), num[3].clone(),],
named: true,
- })),
- data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
-
Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
- Box::new(DataType::Int(None)),
- None
- ))),
+ })],
+ named: true,
+ })),
+ data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
+ Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
+ Box::new(DataType::Int(None)),
None
- )),
- format: None,
- }))),
- subscript: Box::new(Subscript::Index {
+ ))),
+ None
+ )),
+ format: None,
+ }))),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Index {
index: num[1].clone()
}),
- }),
- subscript: Box::new(Subscript::Index {
- index: num[2].clone()
- }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: num[2].clone()
+ }),
+ ],
},
expr_from_projection(only(&select.projection)),
);
@@ -2269,9 +2267,13 @@ fn parse_array_subscript() {
),
];
for (sql, expect) in tests {
- let Expr::Subscript { subscript, .. } =
pg_and_generic().verified_expr(sql) else {
+ let Expr::CompoundFieldAccess { access_chain, .. } =
pg_and_generic().verified_expr(sql)
+ else {
panic!("expected subscript expr");
};
+ let Some(AccessExpr::Subscript(subscript)) = access_chain.last() else {
+ panic!("expected subscript");
+ };
assert_eq!(expect, *subscript);
}
@@ -2282,25 +2284,25 @@ fn parse_array_subscript() {
fn parse_array_multi_subscript() {
let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]");
assert_eq!(
- Expr::Subscript {
- expr: Box::new(Expr::Subscript {
- expr: Box::new(call(
- "make_array",
- vec![
- Expr::Value(number("1")),
- Expr::Value(number("2")),
- Expr::Value(number("3"))
- ]
- )),
- subscript: Box::new(Subscript::Slice {
+ Expr::CompoundFieldAccess {
+ root: Box::new(call(
+ "make_array",
+ vec![
+ Expr::Value(number("1")),
+ Expr::Value(number("2")),
+ Expr::Value(number("3"))
+ ]
+ )),
+ access_chain: vec![
+ AccessExpr::Subscript(Subscript::Slice {
lower_bound: Some(Expr::Value(number("1"))),
upper_bound: Some(Expr::Value(number("2"))),
stride: None,
}),
- }),
- subscript: Box::new(Subscript::Index {
- index: Expr::Value(number("2")),
- }),
+ AccessExpr::Subscript(Subscript::Index {
+ index: Expr::Value(number("2")),
+ }),
+ ],
},
expr,
);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]