This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 46f2234c GenericDialect: support colon operator for JsonAccess (#2124)
46f2234c is described below

commit 46f2234c1596d8763db3955ec50d6343ca2f77a5
Author: Samyak Sarnayak <[email protected]>
AuthorDate: Fri Jan 16 16:21:08 2026 +0530

    GenericDialect: support colon operator for JsonAccess (#2124)
---
 src/dialect/mod.rs           |  10 +++
 src/dialect/mssql.rs         |   9 +++
 src/dialect/postgresql.rs    |   3 +
 src/parser/mod.rs            |   8 ++-
 tests/sqlparser_common.rs    | 145 +++++++++++++++++++++++++++++++++++++++++++
 tests/sqlparser_snowflake.rs | 115 +---------------------------------
 6 files changed, 174 insertions(+), 116 deletions(-)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 873108ee..d1728566 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any {
             Token::DoubleColon | Token::ExclamationMark | Token::LBracket | 
Token::CaretAt => {
                 Ok(p!(DoubleColon))
             }
+            Token::Colon => match parser.peek_nth_token(1).token {
+                // When colon is followed by a string or a number, it's 
usually in MAP syntax.
+                Token::SingleQuotedString(_) | Token::Number(_, _) => 
Ok(self.prec_unknown()),
+                // In other cases, it's used in semi-structured data traversal 
like in variant or JSON
+                // string columns. See `JsonAccess`.
+                _ => Ok(p!(Colon)),
+            },
             Token::Arrow
             | Token::LongArrow
             | Token::HashArrow
@@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any {
             Precedence::Ampersand => 23,
             Precedence::Caret => 22,
             Precedence::Pipe => 21,
+            Precedence::Colon => 21,
             Precedence::Between => 20,
             Precedence::Eq => 20,
             Precedence::Like => 19,
@@ -1269,6 +1277,8 @@ pub enum Precedence {
     Caret,
     /// Bitwise `OR` / pipe operator (`|`).
     Pipe,
+    /// `:` operator for json/variant access.
+    Colon,
     /// `BETWEEN` operator.
     Between,
     /// Equality operator (`=`).
diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs
index faf3402c..a2854525 100644
--- a/src/dialect/mssql.rs
+++ b/src/dialect/mssql.rs
@@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect {
             None
         }
     }
+
+    fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, 
ParserError>> {
+        let token = parser.peek_token();
+        match token.token {
+            // lowest prec to prevent it from turning into a binary op
+            Token::Colon => Some(Ok(self.prec_unknown())),
+            _ => None,
+        }
+    }
 }
 
 impl MsSqlDialect {
diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs
index 02bab0e0..7c9e7db8 100644
--- a/src/dialect/postgresql.rs
+++ b/src/dialect/postgresql.rs
@@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect {
             | Token::ShiftRight
             | Token::ShiftLeft
             | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)),
+            // lowest prec to prevent it from turning into a binary op
+            Token::Colon => Some(Ok(self.prec_unknown())),
             _ => None,
         }
     }
@@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect {
             Precedence::Ampersand => PG_OTHER_PREC,
             Precedence::Caret => CARET_PREC,
             Precedence::Pipe => PG_OTHER_PREC,
+            Precedence::Colon => PG_OTHER_PREC,
             Precedence::Between => BETWEEN_LIKE_PREC,
             Precedence::Eq => EQ_PREC,
             Precedence::Like => BETWEEN_LIKE_PREC,
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 47bb1164..6fd7b5ca 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3918,7 +3918,7 @@ impl<'a> Parser<'a> {
                 expr: Box::new(expr),
             })
         } else if Token::LBracket == *tok && self.dialect.supports_partiql()
-            || (dialect_of!(self is SnowflakeDialect | GenericDialect) && 
Token::Colon == *tok)
+            || (Token::Colon == *tok)
         {
             self.prev_token();
             self.parse_json_access(expr)
@@ -3954,7 +3954,8 @@ impl<'a> Parser<'a> {
         let lower_bound = if self.consume_token(&Token::Colon) {
             None
         } else {
-            Some(self.parse_expr()?)
+            // parse expr until we hit a colon (or any token with lower 
precedence)
+            
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
         };
 
         // check for end
@@ -3982,7 +3983,8 @@ impl<'a> Parser<'a> {
                 stride: None,
             });
         } else {
-            Some(self.parse_expr()?)
+            // parse expr until we hit a colon (or any token with lower 
precedence)
+            
Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?)
         };
 
         // check for end
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index 95ad9a20..bbbf0d83 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -18067,3 +18067,148 @@ fn test_binary_kw_as_cast() {
     all_dialects_where(|d| d.supports_binary_kw_as_cast())
         .one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS 
BINARY)");
 }
+
+#[test]
+fn parse_semi_structured_data_traversal() {
+    let dialects = TestedDialects::new(vec![
+        Box::new(GenericDialect {}),
+        Box::new(SnowflakeDialect {}),
+        Box::new(DatabricksDialect {}),
+    ]);
+
+    // most basic case
+    let sql = "SELECT a:b FROM t";
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        SelectItem::UnnamedExpr(Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("a"))),
+            path: JsonPath {
+                path: vec![JsonPathElem::Dot {
+                    key: "b".to_owned(),
+                    quoted: false
+                }]
+            },
+        }),
+        select.projection[0]
+    );
+
+    // identifier can be quoted
+    let sql = r#"SELECT a:"my long object key name" FROM t"#;
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        SelectItem::UnnamedExpr(Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("a"))),
+            path: JsonPath {
+                path: vec![JsonPathElem::Dot {
+                    key: "my long object key name".to_owned(),
+                    quoted: true
+                }]
+            },
+        }),
+        select.projection[0]
+    );
+
+    dialects.verified_stmt("SELECT a:b::INT FROM t");
+
+    // unquoted keywords are permitted in the object key
+    let sql = "SELECT a:select, a:from FROM t";
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        vec![
+            SelectItem::UnnamedExpr(Expr::JsonAccess {
+                value: Box::new(Expr::Identifier(Ident::new("a"))),
+                path: JsonPath {
+                    path: vec![JsonPathElem::Dot {
+                        key: "select".to_owned(),
+                        quoted: false
+                    }]
+                },
+            }),
+            SelectItem::UnnamedExpr(Expr::JsonAccess {
+                value: Box::new(Expr::Identifier(Ident::new("a"))),
+                path: JsonPath {
+                    path: vec![JsonPathElem::Dot {
+                        key: "from".to_owned(),
+                        quoted: false
+                    }]
+                },
+            })
+        ],
+        select.projection
+    );
+
+    // multiple levels can be traversed
+    // 
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
+    let sql = r#"SELECT a:foo."bar".baz"#;
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("a"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: "foo".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Dot {
+                        key: "bar".to_owned(),
+                        quoted: true,
+                    },
+                    JsonPathElem::Dot {
+                        key: "baz".to_owned(),
+                        quoted: false,
+                    }
+                ]
+            },
+        })],
+        select.projection
+    );
+
+    // dot and bracket notation can be mixed (starting with : case)
+    // 
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
+    let sql = r#"SELECT a:foo[0].bar"#;
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("a"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: "foo".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Bracket {
+                        key: Expr::value(number("0")),
+                    },
+                    JsonPathElem::Dot {
+                        key: "bar".to_owned(),
+                        quoted: false,
+                    }
+                ]
+            },
+        })],
+        select.projection
+    );
+}
+
+#[test]
+fn parse_array_subscript() {
+    let dialects = all_dialects_except(|d| {
+        d.is::<MsSqlDialect>()
+            || d.is::<SnowflakeDialect>()
+            || d.is::<SQLiteDialect>()
+            || d.is::<RedshiftSqlDialect>()
+    });
+
+    dialects.verified_stmt("SELECT arr[1]");
+    dialects.verified_stmt("SELECT arr[:]");
+    dialects.verified_stmt("SELECT arr[1:2]");
+    dialects.verified_stmt("SELECT arr[1:2:4]");
+    dialects.verified_stmt("SELECT arr[1:array_length(arr)]");
+    dialects.verified_stmt("SELECT arr[array_length(arr) - 
1:array_length(arr)]");
+    dialects
+        .verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 
1:array_length(arr)]");
+
+    dialects.verified_stmt("SELECT arr[1][2]");
+    dialects.verified_stmt("SELECT arr[:][:]");
+}
diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs
index 37e9f8cb..5889b2bd 100644
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() {
 // https://docs.snowflake.com/en/user-guide/querying-semistructured
 #[test]
 fn parse_semi_structured_data_traversal() {
-    // most basic case
-    let sql = "SELECT a:b FROM t";
-    let select = snowflake().verified_only_select(sql);
-    assert_eq!(
-        SelectItem::UnnamedExpr(Expr::JsonAccess {
-            value: Box::new(Expr::Identifier(Ident::new("a"))),
-            path: JsonPath {
-                path: vec![JsonPathElem::Dot {
-                    key: "b".to_owned(),
-                    quoted: false
-                }]
-            },
-        }),
-        select.projection[0]
-    );
-
-    // identifier can be quoted
-    let sql = r#"SELECT a:"my long object key name" FROM t"#;
-    let select = snowflake().verified_only_select(sql);
-    assert_eq!(
-        SelectItem::UnnamedExpr(Expr::JsonAccess {
-            value: Box::new(Expr::Identifier(Ident::new("a"))),
-            path: JsonPath {
-                path: vec![JsonPathElem::Dot {
-                    key: "my long object key name".to_owned(),
-                    quoted: true
-                }]
-            },
-        }),
-        select.projection[0]
-    );
+    // see `tests/sqlparser_common.rs` -> 
`parse_semi_structured_data_traversal` for more test
+    // cases. This test only has Snowflake-specific syntax like array access.
 
     // expressions are allowed in bracket notation
     let sql = r#"SELECT a[2 + 2] FROM t"#;
@@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() {
         select.projection[0]
     );
 
-    snowflake().verified_stmt("SELECT a:b::INT FROM t");
-
-    // unquoted keywords are permitted in the object key
-    let sql = "SELECT a:select, a:from FROM t";
-    let select = snowflake().verified_only_select(sql);
-    assert_eq!(
-        vec![
-            SelectItem::UnnamedExpr(Expr::JsonAccess {
-                value: Box::new(Expr::Identifier(Ident::new("a"))),
-                path: JsonPath {
-                    path: vec![JsonPathElem::Dot {
-                        key: "select".to_owned(),
-                        quoted: false
-                    }]
-                },
-            }),
-            SelectItem::UnnamedExpr(Expr::JsonAccess {
-                value: Box::new(Expr::Identifier(Ident::new("a"))),
-                path: JsonPath {
-                    path: vec![JsonPathElem::Dot {
-                        key: "from".to_owned(),
-                        quoted: false
-                    }]
-                },
-            })
-        ],
-        select.projection
-    );
-
-    // multiple levels can be traversed
-    // 
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
-    let sql = r#"SELECT a:foo."bar".baz"#;
-    let select = snowflake().verified_only_select(sql);
-    assert_eq!(
-        vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
-            value: Box::new(Expr::Identifier(Ident::new("a"))),
-            path: JsonPath {
-                path: vec![
-                    JsonPathElem::Dot {
-                        key: "foo".to_owned(),
-                        quoted: false,
-                    },
-                    JsonPathElem::Dot {
-                        key: "bar".to_owned(),
-                        quoted: true,
-                    },
-                    JsonPathElem::Dot {
-                        key: "baz".to_owned(),
-                        quoted: false,
-                    }
-                ]
-            },
-        })],
-        select.projection
-    );
-
-    // dot and bracket notation can be mixed (starting with : case)
-    // 
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
-    let sql = r#"SELECT a:foo[0].bar"#;
-    let select = snowflake().verified_only_select(sql);
-    assert_eq!(
-        vec![SelectItem::UnnamedExpr(Expr::JsonAccess {
-            value: Box::new(Expr::Identifier(Ident::new("a"))),
-            path: JsonPath {
-                path: vec![
-                    JsonPathElem::Dot {
-                        key: "foo".to_owned(),
-                        quoted: false,
-                    },
-                    JsonPathElem::Bracket {
-                        key: Expr::value(number("0")),
-                    },
-                    JsonPathElem::Dot {
-                        key: "bar".to_owned(),
-                        quoted: false,
-                    }
-                ]
-            },
-        })],
-        select.projection
-    );
-
     // dot and bracket notation can be mixed (starting with bracket case)
     // 
https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation
     let sql = r#"SELECT a[0].foo.bar"#;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to