This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new c2d7cf388d feat: Firebolt sqlglot dialect (#31825)
c2d7cf388d is described below
commit c2d7cf388d386de36e52f7a294e2783ad6539ee6
Author: Beto Dealmeida <[email protected]>
AuthorDate: Tue Jan 14 09:36:25 2025 -0500
feat: Firebolt sqlglot dialect (#31825)
---
pyproject.toml | 4 +-
requirements/base.txt | 7 +---
requirements/development.txt | 2 +-
superset/sql/dialects/__init__.py | 16 ++++++++
superset/sql/dialects/firebolt.py | 75 +++++++++++++++++++++++++++++++++++++
superset/sql/parse.py | 7 +++-
tests/unit_tests/sql/parse_tests.py | 39 +++++++++++++++++--
7 files changed, 136 insertions(+), 14 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 715e0f3db1..e643d54b52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,9 +93,7 @@ dependencies = [
"slack_sdk>=3.19.0, <4",
"sqlalchemy>=1.4, <2",
"sqlalchemy-utils>=0.38.3, <0.39",
- # known breaking changes in sqlglot 25.25.0
-
#https://github.com/tobymao/sqlglot/blob/main/CHANGELOG.md#v25250---2024-10-14
- "sqlglot>=25.24.0,<25.25.0",
+ "sqlglot>=26.1.3, <27",
"sqlparse>=0.5.0",
"tabulate>=0.8.9, <0.9",
"typing-extensions>=4, <5",
diff --git a/requirements/base.txt b/requirements/base.txt
index 5540942598..7a30a505cb 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -9,9 +9,7 @@ apispec==6.3.0
apsw==3.46.0.0
# via shillelagh
async-timeout==4.0.3
- # via
- # -r requirements/base.in
- # redis
+ # via -r requirements/base.in
attrs==24.2.0
# via
# cattrs
@@ -370,7 +368,7 @@ sqlalchemy-utils==0.38.3
# via
# apache-superset (pyproject.toml)
# flask-appbuilder
-sqlglot==25.24.5
+sqlglot==26.1.3
# via apache-superset (pyproject.toml)
sqlparse==0.5.2
# via apache-superset (pyproject.toml)
@@ -388,7 +386,6 @@ typing-extensions==4.12.2
# via
# apache-superset (pyproject.toml)
# alembic
- # cattrs
# flask-limiter
# kombu
# limits
diff --git a/requirements/development.txt b/requirements/development.txt
index 2343428738..a8082d9074 100644
--- a/requirements/development.txt
+++ b/requirements/development.txt
@@ -800,7 +800,7 @@ sqlalchemy-utils==0.38.3
# -c requirements/base.txt
# apache-superset
# flask-appbuilder
-sqlglot==25.24.5
+sqlglot==26.1.3
# via
# -c requirements/base.txt
# apache-superset
diff --git a/superset/sql/dialects/__init__.py
b/superset/sql/dialects/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/superset/sql/dialects/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/superset/sql/dialects/firebolt.py
b/superset/sql/dialects/firebolt.py
new file mode 100644
index 0000000000..119ee3ba19
--- /dev/null
+++ b/superset/sql/dialects/firebolt.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from sqlglot import exp, generator, parser
+from sqlglot.dialects.dialect import Dialect
+from sqlglot.tokens import TokenType
+
+
+class Firebolt(Dialect):
+ """
+ A sqlglot dialect for Firebolt.
+ """
+
+ class Parser(parser.Parser):
+ """
+ Custom parser for Firebolt.
+
+ In Firebolt `NOT` has higher precedence than `IN`, so we need to wrap
the
+ expression in parentheses when we find a negated range.
+ """
+
+ UNARY_PARSERS = {
+ **parser.Parser.UNARY_PARSERS,
+ TokenType.NOT: lambda self: self.expression(
+ exp.Not,
+ this=self._parse_unary(),
+ ),
+ }
+
+ def _negate_range(
+ self,
+ this: exp.Expression | None = None,
+ ) -> exp.Expression | None:
+ if not this:
+ return this
+
+ return self.expression(exp.Not, this=self.expression(exp.Paren,
this=this))
+
+ class Generator(generator.Generator):
+ """
+ Custom generator for Firebolt.
+ """
+
+ TYPE_MAPPING = {
+ **generator.Generator.TYPE_MAPPING,
+ exp.DataType.Type.VARBINARY: "BYTEA",
+ }
+
+ def not_sql(self, expression: exp.Not) -> str:
+ """
+ Parenthesize negated expressions.
+
+ Firebolt requires negated to be wrapped in parentheses, since NOT
has higher
+ precedence than IN.
+ """
+ if isinstance(expression.this, exp.In):
+ return f"NOT ({self.sql(expression, 'this')})"
+
+ return super().not_sql(expression)
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index 34ec9299d3..3fd13a8006 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -36,9 +36,12 @@ from sqlglot.optimizer.pushdown_predicates import
pushdown_predicates
from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
from superset.exceptions import SupersetParseError
+from superset.sql.dialects.firebolt import Firebolt
logger = logging.getLogger(__name__)
+# register 3rd party dialects
+Dialect.classes["firebolt"] = Firebolt
# mapping between DB engine specs and sqlglot dialects
SQLGLOT_DIALECTS = {
@@ -62,7 +65,7 @@ SQLGLOT_DIALECTS = {
# "elasticsearch": ???
# "exa": ???
# "firebird": ???
- # "firebolt": ???
+ "firebolt": "firebolt",
"gsheets": Dialects.SQLITE,
"hana": Dialects.POSTGRES,
"hive": Dialects.HIVE,
@@ -81,7 +84,7 @@ SQLGLOT_DIALECTS = {
"presto": Dialects.PRESTO,
"pydoris": Dialects.DORIS,
"redshift": Dialects.REDSHIFT,
- # "risingwave": ???
+ "risingwave": Dialects.RISINGWAVE,
# "rockset": ???
"shillelagh": Dialects.SQLITE,
"snowflake": Dialects.SNOWFLAKE,
diff --git a/tests/unit_tests/sql/parse_tests.py
b/tests/unit_tests/sql/parse_tests.py
index 1eabb78e05..a2aff686a7 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -301,7 +301,7 @@ def test_format_no_dialect() -> None:
Test format with an engine that has no corresponding dialect.
"""
assert (
- SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)",
"firebolt").format()
+ SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)",
"dremio").format()
== "SELECT col\nFROM t\nWHERE col NOT IN (1,\n 2)"
)
@@ -311,7 +311,7 @@ def test_split_no_dialect() -> None:
Test the statement split when the engine has no corresponding dialect.
"""
sql = "SELECT col FROM t WHERE col NOT IN (1, 2); SELECT * FROM t; SELECT
foo"
- statements = SQLScript(sql, "firebolt").statements
+ statements = SQLScript(sql, "dremio").statements
assert len(statements) == 3
assert statements[0]._sql == "SELECT col FROM t WHERE col NOT IN (1, 2)"
assert statements[1]._sql == "SELECT * FROM t"
@@ -1112,4 +1112,37 @@ WHERE anon_1.a > 1
AND anon_1.b = 2"""
assert SQLStatement(sql, "sqlite").optimize().format() == optimized
- assert SQLStatement(sql, "firebolt").optimize().format() == not_optimized
+ assert SQLStatement(sql, "dremio").optimize().format() == not_optimized
+
+
+def test_firebolt() -> None:
+ """
+ Test that Firebolt 3rd party dialect is registered correctly.
+
+ We need a custom dialect for Firebolt because it parses `NOT col IN (1,
2)` as
+ `(NOT col) IN (1, 2)` instead of `NOT (col IN (1, 2))`, which will fail
when `col`
+ is not a boolean.
+
+ Note that `NOT col = 1` works as expected in Firebolt, parsing as `NOT
(col = 1)`.
+ """
+ sql = "SELECT col NOT IN (1, 2) FROM tbl"
+ assert (
+ SQLStatement(sql, "firebolt").format()
+ == """
+SELECT
+ NOT (
+ col IN (1, 2)
+ )
+FROM tbl
+ """.strip()
+ )
+
+ sql = "SELECT NOT col = 1 FROM tbl"
+ assert (
+ SQLStatement(sql, "firebolt").format()
+ == """
+SELECT
+ NOT col = 1
+FROM tbl
+ """.strip()
+ )