This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new c2d7cf388d feat: Firebolt sqlglot dialect (#31825)
c2d7cf388d is described below

commit c2d7cf388d386de36e52f7a294e2783ad6539ee6
Author: Beto Dealmeida <[email protected]>
AuthorDate: Tue Jan 14 09:36:25 2025 -0500

    feat: Firebolt sqlglot dialect (#31825)
---
 pyproject.toml                      |  4 +-
 requirements/base.txt               |  7 +---
 requirements/development.txt        |  2 +-
 superset/sql/dialects/__init__.py   | 16 ++++++++
 superset/sql/dialects/firebolt.py   | 75 +++++++++++++++++++++++++++++++++++++
 superset/sql/parse.py               |  7 +++-
 tests/unit_tests/sql/parse_tests.py | 39 +++++++++++++++++--
 7 files changed, 136 insertions(+), 14 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 715e0f3db1..e643d54b52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,9 +93,7 @@ dependencies = [
     "slack_sdk>=3.19.0, <4",
     "sqlalchemy>=1.4, <2",
     "sqlalchemy-utils>=0.38.3, <0.39",
-    # known breaking changes in sqlglot 25.25.0
-    
#https://github.com/tobymao/sqlglot/blob/main/CHANGELOG.md#v25250---2024-10-14
-    "sqlglot>=25.24.0,<25.25.0",
+    "sqlglot>=26.1.3, <27",
     "sqlparse>=0.5.0",
     "tabulate>=0.8.9, <0.9",
     "typing-extensions>=4, <5",
diff --git a/requirements/base.txt b/requirements/base.txt
index 5540942598..7a30a505cb 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -9,9 +9,7 @@ apispec==6.3.0
 apsw==3.46.0.0
     # via shillelagh
 async-timeout==4.0.3
-    # via
-    #   -r requirements/base.in
-    #   redis
+    # via -r requirements/base.in
 attrs==24.2.0
     # via
     #   cattrs
@@ -370,7 +368,7 @@ sqlalchemy-utils==0.38.3
     # via
     #   apache-superset (pyproject.toml)
     #   flask-appbuilder
-sqlglot==25.24.5
+sqlglot==26.1.3
     # via apache-superset (pyproject.toml)
 sqlparse==0.5.2
     # via apache-superset (pyproject.toml)
@@ -388,7 +386,6 @@ typing-extensions==4.12.2
     # via
     #   apache-superset (pyproject.toml)
     #   alembic
-    #   cattrs
     #   flask-limiter
     #   kombu
     #   limits
diff --git a/requirements/development.txt b/requirements/development.txt
index 2343428738..a8082d9074 100644
--- a/requirements/development.txt
+++ b/requirements/development.txt
@@ -800,7 +800,7 @@ sqlalchemy-utils==0.38.3
     #   -c requirements/base.txt
     #   apache-superset
     #   flask-appbuilder
-sqlglot==25.24.5
+sqlglot==26.1.3
     # via
     #   -c requirements/base.txt
     #   apache-superset
diff --git a/superset/sql/dialects/__init__.py 
b/superset/sql/dialects/__init__.py
new file mode 100644
index 0000000000..13a83393a9
--- /dev/null
+++ b/superset/sql/dialects/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/superset/sql/dialects/firebolt.py 
b/superset/sql/dialects/firebolt.py
new file mode 100644
index 0000000000..119ee3ba19
--- /dev/null
+++ b/superset/sql/dialects/firebolt.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+from sqlglot import exp, generator, parser
+from sqlglot.dialects.dialect import Dialect
+from sqlglot.tokens import TokenType
+
+
+class Firebolt(Dialect):
+    """
+    A sqlglot dialect for Firebolt.
+    """
+
+    class Parser(parser.Parser):
+        """
+        Custom parser for Firebolt.
+
+        In Firebolt `NOT` has higher precedence than `IN`, so we need to wrap 
the
+        expression in parentheses when we find a negated range.
+        """
+
+        UNARY_PARSERS = {
+            **parser.Parser.UNARY_PARSERS,
+            TokenType.NOT: lambda self: self.expression(
+                exp.Not,
+                this=self._parse_unary(),
+            ),
+        }
+
+        def _negate_range(
+            self,
+            this: exp.Expression | None = None,
+        ) -> exp.Expression | None:
+            if not this:
+                return this
+
+            return self.expression(exp.Not, this=self.expression(exp.Paren, 
this=this))
+
+    class Generator(generator.Generator):
+        """
+        Custom generator for Firebolt.
+        """
+
+        TYPE_MAPPING = {
+            **generator.Generator.TYPE_MAPPING,
+            exp.DataType.Type.VARBINARY: "BYTEA",
+        }
+
+        def not_sql(self, expression: exp.Not) -> str:
+            """
+            Parenthesize negated expressions.
+
+            Firebolt requires negated to be wrapped in parentheses, since NOT 
has higher
+            precedence than IN.
+            """
+            if isinstance(expression.this, exp.In):
+                return f"NOT ({self.sql(expression, 'this')})"
+
+            return super().not_sql(expression)
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index 34ec9299d3..3fd13a8006 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -36,9 +36,12 @@ from sqlglot.optimizer.pushdown_predicates import 
pushdown_predicates
 from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
 
 from superset.exceptions import SupersetParseError
+from superset.sql.dialects.firebolt import Firebolt
 
 logger = logging.getLogger(__name__)
 
+# register 3rd party dialects
+Dialect.classes["firebolt"] = Firebolt
 
 # mapping between DB engine specs and sqlglot dialects
 SQLGLOT_DIALECTS = {
@@ -62,7 +65,7 @@ SQLGLOT_DIALECTS = {
     # "elasticsearch": ???
     # "exa": ???
     # "firebird": ???
-    # "firebolt": ???
+    "firebolt": "firebolt",
     "gsheets": Dialects.SQLITE,
     "hana": Dialects.POSTGRES,
     "hive": Dialects.HIVE,
@@ -81,7 +84,7 @@ SQLGLOT_DIALECTS = {
     "presto": Dialects.PRESTO,
     "pydoris": Dialects.DORIS,
     "redshift": Dialects.REDSHIFT,
-    # "risingwave": ???
+    "risingwave": Dialects.RISINGWAVE,
     # "rockset": ???
     "shillelagh": Dialects.SQLITE,
     "snowflake": Dialects.SNOWFLAKE,
diff --git a/tests/unit_tests/sql/parse_tests.py 
b/tests/unit_tests/sql/parse_tests.py
index 1eabb78e05..a2aff686a7 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -301,7 +301,7 @@ def test_format_no_dialect() -> None:
     Test format with an engine that has no corresponding dialect.
     """
     assert (
-        SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", 
"firebolt").format()
+        SQLScript("SELECT col FROM t WHERE col NOT IN (1, 2)", 
"dremio").format()
         == "SELECT col\nFROM t\nWHERE col NOT IN (1,\n                  2)"
     )
 
@@ -311,7 +311,7 @@ def test_split_no_dialect() -> None:
     Test the statement split when the engine has no corresponding dialect.
     """
     sql = "SELECT col FROM t WHERE col NOT IN (1, 2); SELECT * FROM t; SELECT 
foo"
-    statements = SQLScript(sql, "firebolt").statements
+    statements = SQLScript(sql, "dremio").statements
     assert len(statements) == 3
     assert statements[0]._sql == "SELECT col FROM t WHERE col NOT IN (1, 2)"
     assert statements[1]._sql == "SELECT * FROM t"
@@ -1112,4 +1112,37 @@ WHERE anon_1.a > 1
   AND anon_1.b = 2"""
 
     assert SQLStatement(sql, "sqlite").optimize().format() == optimized
-    assert SQLStatement(sql, "firebolt").optimize().format() == not_optimized
+    assert SQLStatement(sql, "dremio").optimize().format() == not_optimized
+
+
+def test_firebolt() -> None:
+    """
+    Test that Firebolt 3rd party dialect is registered correctly.
+
+    We need a custom dialect for Firebolt because it parses `NOT col IN (1, 
2)` as
+    `(NOT col) IN (1, 2)` instead of `NOT (col IN (1, 2))`, which will fail 
when `col`
+    is not a boolean.
+
+    Note that `NOT col = 1` works as expected in Firebolt, parsing as `NOT 
(col = 1)`.
+    """
+    sql = "SELECT col NOT IN (1, 2) FROM tbl"
+    assert (
+        SQLStatement(sql, "firebolt").format()
+        == """
+SELECT
+  NOT (
+    col IN (1, 2)
+  )
+FROM tbl
+    """.strip()
+    )
+
+    sql = "SELECT NOT col = 1 FROM tbl"
+    assert (
+        SQLStatement(sql, "firebolt").format()
+        == """
+SELECT
+  NOT col = 1
+FROM tbl
+    """.strip()
+    )

Reply via email to