This is an automated email from the ASF dual-hosted git repository. github-bot pushed a commit to branch gh-readonly-queue/main/pr-2272-d38dd78122236a2186ebfa0e252b6b4dcfc3537c in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
commit 47b6aac72208209615bbeec121e0931f80fde55f Author: whirlun <[email protected]> AuthorDate: Thu Mar 12 22:14:12 2026 -0700 add support for databricks JSON accessors (#2272) --- src/ast/mod.rs | 11 +++++++++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 13 ++++++++++--- tests/sqlparser_databricks.rs | 44 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 789bf282..6659878b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -651,6 +651,14 @@ pub enum JsonPathElem { /// The expression used as the bracket key (string or numeric expression). key: Expr, }, + /// Access an object field using colon bracket notation + /// e.g. `obj:['foo']` + /// + /// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html> + ColonBracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, } /// A JSON path. @@ -685,6 +693,9 @@ impl fmt::Display for JsonPath { JsonPathElem::Bracket { key } => { write!(f, "[{key}]")?; } + JsonPathElem::ColonBracket { key } => { + write!(f, ":[{key}]")?; + } } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 24fee30d..8dd8d8c5 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1799,6 +1799,7 @@ impl Spanned for JsonPathElem { match self { JsonPathElem::Dot { .. } => Span::empty(), JsonPathElem::Bracket { key } => key.span(), + JsonPathElem::ColonBracket { key } => key.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9530a4aa..dc47c27b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4195,8 +4195,9 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), + // path segments in SF dot notation can be unquoted or double-quoted; + // Databricks also supports backtick-quoted identifiers + quote_style: quote_style @ (Some('"') | Some('`') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. keyword: _, @@ -4226,6 +4227,12 @@ impl<'a> Parser<'a> { let mut path = Vec::new(); loop { match self.next_token().token { + Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => { + self.next_token(); + let key = self.parse_wildcard_expr()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::ColonBracket { key }); + } Token::Colon if path.is_empty() => { path.push(self.parse_json_path_object_key()?); } @@ -4233,7 +4240,7 @@ impl<'a> Parser<'a> { path.push(self.parse_json_path_object_key()?); } Token::LBracket => { - let key = self.parse_expr()?; + let key = self.parse_wildcard_expr()?; self.expect_token(&Token::RBracket)?; path.push(JsonPathElem::Bracket { key }); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 24d06ef2..79b3d065 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -600,3 +600,47 @@ fn parse_databricks_struct_type() { _ => unreachable!(), } } + +#[test] +fn parse_databricks_json_accessor() { + // Basic colon accessor — unquoted field names are case-insensitive + databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); + + // Unquoted field access is case-insensitive; bracket notation is case-sensitive. + databricks().verified_only_select( + "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data", + ); + + // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#, + ); + + // Dot notation + databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); + + // String-key bracket notation after a dot segment + databricks() + .verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data"); + + // Integer-index bracket notation + databricks() + .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data"); + + // Wildcard [*] — including chained and mixed positions + databricks().verified_only_select( + "SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \ + raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \ + raw:store.basket[0][2].b AS subfield FROM store_data", + ); + + // Dot access following a wildcard bracket + databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data"); + + // Double-colon cast — type keyword normalises to upper case + databricks().one_statement_parses_to( + "SELECT raw:store.bicycle.price::double FROM store_data", + "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", + ); +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
