This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 46f2234c GenericDialect: support colon operator for JsonAccess (#2124)
46f2234c is described below
commit 46f2234c1596d8763db3955ec50d6343ca2f77a5
Author: Samyak Sarnayak <[email protected]>
AuthorDate: Fri Jan 16 16:21:08 2026 +0530
GenericDialect: support colon operator for JsonAccess (#2124)
---
src/dialect/mod.rs | 10 +++
src/dialect/mssql.rs | 9 +++
src/dialect/postgresql.rs | 3 +
src/parser/mod.rs | 8 ++-
tests/sqlparser_common.rs | 145 +++++++++++++++++++++++++++++++++++++++++++
tests/sqlparser_snowflake.rs | 115 +---------------------------------
6 files changed, 174 insertions(+), 116 deletions(-)
diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 873108ee..d1728566 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
Token::DoubleColon | Token::ExclamationMark | Token::LBracket |
Token::CaretAt => {
Ok(p!(DoubleColon))
}
+ Token::Colon => match parser.peek_nth_token(1).token {
+ // When colon is followed by a string or a number, it's
usually in MAP syntax.
+ Token::SingleQuotedString(_) | Token::Number(_, _) =>
Ok(self.prec_unknown()),
+ // In other cases, it's used in semi-structured data traversal
like in variant or JSON
+ // string columns. See `JsonAccess`.
+ _ => Ok(p!(Colon)),
+ },
Token::Arrow
| Token::LongArrow
| Token::HashArrow
@@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
Precedence::Ampersand => 23,
Precedence::Caret => 22,
Precedence::Pipe => 21,
+ Precedence::Colon => 21,
Precedence::Between => 20,
Precedence::Eq => 20,
Precedence::Like => 19,
@@ -1269,6 +1277,8 @@ pub enum Precedence {
Caret,
/// Bitwise `OR` / pipe operator (`|`).
Pipe,
+ /// `:` operator for json/variant access.
+ Colon,
/// `BETWEEN` operator.
Between,
/// Equality operator (`=`).
diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs
index faf3402c..a2854525 100644
--- a/src/dialect/mssql.rs
+++ b/src/dialect/mssql.rs
@@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
None
}
}
+
+ fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8,
ParserError>> {
+ let token = parser.peek_token();
+ match token.token {
+ // lowest prec to prevent it from turning into a binary op
+ Token::Colon => Some(Ok(self.prec_unknown())),
+ _ => None,
+ }
+ }
}
impl MsSqlDialect {
diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
index 02bab0e0..7c9e7db8 100644
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
| Token::ShiftRight
| Token::ShiftLeft
| Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
+ // lowest prec to prevent it from turning into a binary op
+ Token::Colon => Some(Ok(self.prec_unknown())),
_ => None,
}
}
@@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
Precedence::Ampersand => PG_OTHER_PREC,
Precedence::Caret => CARET_PREC,
Precedence::Pipe => PG_OTHER_PREC,
+ Precedence::Colon => PG_OTHER_PREC,
Precedence::Between => BETWEEN_LIKE_PREC,
Precedence::Eq => EQ_PREC,
Precedence::Like => BETWEEN_LIKE_PREC,
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 47bb1164..6fd7b5ca 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3918,7 +3918,7 @@ impl<'a> Parser<'a> {
expr: Box::new(expr),
})
} else if Token::LBracket == *tok && self.dialect.supports_partiql()
- || (dialect_of!(self is SnowflakeDialect | GenericDialect) &&
Token::Colon == *tok)
+ || (Token::Colon == *tok)
{
self.prev_token();
self.parse_json_access(expr)
@@ -3954,7 +3954,8 @@ impl<'a> Parser<'a> {
let lower_bound = if self.consume_token(&Token::Colon) {
None
} else {
- Some(self.parse_expr()?)
+ // parse expr until we hit a colon (or any token with lower
precedence)
+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};
// check for end
@@ -3982,7 +3983,8 @@ impl<'a> Parser<'a> {
stride: None,
});
} else {
- Some(self.parse_expr()?)
+ // parse expr until we hit a colon (or any token with lower
precedence)
+
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
};
// check for end
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 95ad9a20..bbbf0d83 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -18067,3 +18067,148 @@ fn test_binary_kw_as_cast() {
all_dialects_where(|d| d.supports_binary_kw_as_cast())
.one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS
BINARY)");
}
+
+#[test]
+fn parse_semi_structured_data_traversal() {
+ let dialects = TestedDialects::new(vec![
+ Box::new(GenericDialect {}),
+ Box::new(SnowflakeDialect {}),
+ Box::new(DatabricksDialect {}),
+ ]);
+
+ // most basic case
+ let sql = "SELECT a:b FROM t";
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Dot {
+ key: "b".to_owned(),
+ quoted: false
+ }]
+ },
+ }),
+ select.projection[0]
+ );
+
+ // identifier can be quoted
+ let sql = r#"SELECT a:"my long object key name" FROM t"#;
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Dot {
+ key: "my long object key name".to_owned(),
+ quoted: true
+ }]
+ },
+ }),
+ select.projection[0]
+ );
+
+ dialects.verified_stmt("SELECT a:b::INT FROM t");
+
+ // unquoted keywords are permitted in the object key
+ let sql = "SELECT a:select, a:from FROM t";
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ vec![
+ SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Dot {
+ key: "select".to_owned(),
+ quoted: false
+ }]
+ },
+ }),
+ SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Dot {
+ key: "from".to_owned(),
+ quoted: false
+ }]
+ },
+ })
+ ],
+ select.projection
+ );
+
+ // multiple levels can be traversed
+ //
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
+ let sql = r#"SELECT a:foo."bar".baz"#;
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![
+ JsonPathElem::Dot {
+ key: "foo".to_owned(),
+ quoted: false,
+ },
+ JsonPathElem::Dot {
+ key: "bar".to_owned(),
+ quoted: true,
+ },
+ JsonPathElem::Dot {
+ key: "baz".to_owned(),
+ quoted: false,
+ }
+ ]
+ },
+ })],
+ select.projection
+ );
+
+ // dot and bracket notation can be mixed (starting with : case)
+ //
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
+ let sql = r#"SELECT a:foo[0].bar"#;
+ let select = dialects.verified_only_select(sql);
+ assert_eq!(
+ vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![
+ JsonPathElem::Dot {
+ key: "foo".to_owned(),
+ quoted: false,
+ },
+ JsonPathElem::Bracket {
+ key: Expr::value(number("0")),
+ },
+ JsonPathElem::Dot {
+ key: "bar".to_owned(),
+ quoted: false,
+ }
+ ]
+ },
+ })],
+ select.projection
+ );
+}
+
+#[test]
+fn parse_array_subscript() {
+ let dialects = all_dialects_except(|d| {
+ d.is::<MsSqlDialect>()
+ || d.is::<SnowflakeDialect>()
+ || d.is::<SQLiteDialect>()
+ || d.is::<RedshiftSqlDialect>()
+ });
+
+ dialects.verified_stmt("SELECT arr[1]");
+ dialects.verified_stmt("SELECT arr[:]");
+ dialects.verified_stmt("SELECT arr[1:2]");
+ dialects.verified_stmt("SELECT arr[1:2:4]");
+ dialects.verified_stmt("SELECT arr[1:array_length(arr)]");
+ dialects.verified_stmt("SELECT arr[array_length(arr) -
1:array_length(arr)]");
+ dialects
+ .verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) -
1:array_length(arr)]");
+
+ dialects.verified_stmt("SELECT arr[1][2]");
+ dialects.verified_stmt("SELECT arr[:][:]");
+}
diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs
index 37e9f8cb..5889b2bd 100644
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
// https://docs.snowflake.com/en/user-guide/querying-semistructured
#[test]
fn parse_semi_structured_data_traversal() {
- // most basic case
- let sql = "SELECT a:b FROM t";
- let select = snowflake().verified_only_select(sql);
- assert_eq!(
- SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![JsonPathElem::Dot {
- key: "b".to_owned(),
- quoted: false
- }]
- },
- }),
- select.projection[0]
- );
-
- // identifier can be quoted
- let sql = r#"SELECT a:"my long object key name" FROM t"#;
- let select = snowflake().verified_only_select(sql);
- assert_eq!(
- SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![JsonPathElem::Dot {
- key: "my long object key name".to_owned(),
- quoted: true
- }]
- },
- }),
- select.projection[0]
- );
+ // see `tests/sqlparser_common.rs` ->
`parse_semi_structured_data_traversal` for more test
+ // cases. This test only has Snowflake-specific syntax like array access.
// expressions are allowed in bracket notation
let sql = r#"SELECT a[2 + 2] FROM t"#;
@@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
select.projection[0]
);
- snowflake().verified_stmt("SELECT a:b::INT FROM t");
-
- // unquoted keywords are permitted in the object key
- let sql = "SELECT a:select, a:from FROM t";
- let select = snowflake().verified_only_select(sql);
- assert_eq!(
- vec![
- SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![JsonPathElem::Dot {
- key: "select".to_owned(),
- quoted: false
- }]
- },
- }),
- SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![JsonPathElem::Dot {
- key: "from".to_owned(),
- quoted: false
- }]
- },
- })
- ],
- select.projection
- );
-
- // multiple levels can be traversed
- //
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
- let sql = r#"SELECT a:foo."bar".baz"#;
- let select = snowflake().verified_only_select(sql);
- assert_eq!(
- vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![
- JsonPathElem::Dot {
- key: "foo".to_owned(),
- quoted: false,
- },
- JsonPathElem::Dot {
- key: "bar".to_owned(),
- quoted: true,
- },
- JsonPathElem::Dot {
- key: "baz".to_owned(),
- quoted: false,
- }
- ]
- },
- })],
- select.projection
- );
-
- // dot and bracket notation can be mixed (starting with : case)
- //
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
- let sql = r#"SELECT a:foo[0].bar"#;
- let select = snowflake().verified_only_select(sql);
- assert_eq!(
- vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
- value: Box::new(Expr::Identifier(Ident::new("a"))),
- path: JsonPath {
- path: vec![
- JsonPathElem::Dot {
- key: "foo".to_owned(),
- quoted: false,
- },
- JsonPathElem::Bracket {
- key: Expr::value(number("0")),
- },
- JsonPathElem::Dot {
- key: "bar".to_owned(),
- quoted: false,
- }
- ]
- },
- })],
- select.projection
- );
-
// dot and bracket notation can be mixed (starting with bracket case)
//
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
let sql = r#"SELECT a[0].foo.bar"#;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]