This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 47b6aac7 add support for databricks JSON accessors (#2272)
47b6aac7 is described below
commit 47b6aac72208209615bbeec121e0931f80fde55f
Author: whirlun <[email protected]>
AuthorDate: Thu Mar 12 22:14:12 2026 -0700
add support for databricks JSON accessors (#2272)
---
src/ast/mod.rs | 11 +++++++++++
src/ast/spans.rs | 1 +
src/parser/mod.rs | 13 ++++++++++---
tests/sqlparser_databricks.rs | 44 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 66 insertions(+), 3 deletions(-)
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 789bf282..6659878b 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -651,6 +651,14 @@ pub enum JsonPathElem {
/// The expression used as the bracket key (string or numeric
expression).
key: Expr,
},
+ /// Access an object field using colon bracket notation
+ /// e.g. `obj:['foo']`
+ ///
+ /// See
<https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>
+ ColonBracket {
+ /// The expression used as the bracket key (string or numeric
expression).
+ key: Expr,
+ },
}
/// A JSON path.
@@ -685,6 +693,9 @@ impl fmt::Display for JsonPath {
JsonPathElem::Bracket { key } => {
write!(f, "[{key}]")?;
}
+ JsonPathElem::ColonBracket { key } => {
+ write!(f, ":[{key}]")?;
+ }
}
}
Ok(())
diff --git a/src/ast/spans.rs b/src/ast/spans.rs
index 24fee30d..8dd8d8c5 100644
--- a/src/ast/spans.rs
+++ b/src/ast/spans.rs
@@ -1799,6 +1799,7 @@ impl Spanned for JsonPathElem {
match self {
JsonPathElem::Dot { .. } => Span::empty(),
JsonPathElem::Bracket { key } => key.span(),
+ JsonPathElem::ColonBracket { key } => key.span(),
}
}
}
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 9530a4aa..dc47c27b 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -4195,8 +4195,9 @@ impl<'a> Parser<'a> {
match token.token {
Token::Word(Word {
value,
- // path segments in SF dot notation can be unquoted or
double-quoted
- quote_style: quote_style @ (Some('"') | None),
+ // path segments in SF dot notation can be unquoted or
double-quoted;
+ // Databricks also supports backtick-quoted identifiers
+ quote_style: quote_style @ (Some('"') | Some('`') | None),
// some experimentation suggests that snowflake permits
// any keyword here unquoted.
keyword: _,
@@ -4226,6 +4227,12 @@ impl<'a> Parser<'a> {
let mut path = Vec::new();
loop {
match self.next_token().token {
+ Token::Colon if path.is_empty() && self.peek_token_ref() ==
&Token::LBracket => {
+ self.next_token();
+ let key = self.parse_wildcard_expr()?;
+ self.expect_token(&Token::RBracket)?;
+ path.push(JsonPathElem::ColonBracket { key });
+ }
Token::Colon if path.is_empty() => {
path.push(self.parse_json_path_object_key()?);
}
@@ -4233,7 +4240,7 @@ impl<'a> Parser<'a> {
path.push(self.parse_json_path_object_key()?);
}
Token::LBracket => {
- let key = self.parse_expr()?;
+ let key = self.parse_wildcard_expr()?;
self.expect_token(&Token::RBracket)?;
path.push(JsonPathElem::Bracket { key });
diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs
index 24d06ef2..79b3d065 100644
--- a/tests/sqlparser_databricks.rs
+++ b/tests/sqlparser_databricks.rs
@@ -600,3 +600,47 @@ fn parse_databricks_struct_type() {
_ => unreachable!(),
}
}
+
+#[test]
+fn parse_databricks_json_accessor() {
+ // Basic colon accessor — unquoted field names are case-insensitive
+ databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM
store_data");
+
+ // Unquoted field access is case-insensitive; bracket notation is
case-sensitive.
+ databricks().verified_only_select(
+ "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive
FROM store_data",
+ );
+
+ // Backtick-quoted keys (Databricks delimited identifiers) normalise to
double-quoted output.
+ databricks().one_statement_parses_to(
+ "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM
store_data",
+ r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM
store_data"#,
+ );
+
+ // Dot notation
+ databricks().verified_only_select("SELECT raw:store.bicycle FROM
store_data");
+
+ // String-key bracket notation after a dot segment
+ databricks()
+ .verified_only_select("SELECT raw:store['bicycle'],
raw:store['BICYCLE'] FROM store_data");
+
+ // Integer-index bracket notation
+ databricks()
+ .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1]
FROM store_data");
+
+ // Wildcard [*] — including chained and mixed positions
+ databricks().verified_only_select(
+ "SELECT raw:store.basket[*], raw:store.basket[*][0] AS
first_of_baskets, \
+ raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS
all_elements_flattened, \
+ raw:store.basket[0][2].b AS subfield FROM store_data",
+ );
+
+ // Dot access following a wildcard bracket
+ databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM
store_data");
+
+ // Double-colon cast — type keyword normalises to upper case
+ databricks().one_statement_parses_to(
+ "SELECT raw:store.bicycle.price::double FROM store_data",
+ "SELECT raw:store.bicycle.price::DOUBLE FROM store_data",
+ );
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]