This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 404f7c1043 feat: initial Dremio sqlglot dialect (#33847)
404f7c1043 is described below

commit 404f7c10434ebed27a9d7c8e5636cd3a22a2eed4
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Jun 20 12:05:45 2025 -0700

    feat: initial Dremio sqlglot dialect (#33847)
---
 superset/extensions/pylint.py                      |  4 +-
 superset/sql/dialects/__init__.py                  |  3 +-
 superset/sql/dialects/dremio.py                    | 46 ++++++++++++++++++++++
 superset/sql/parse.py                              |  4 +-
 .../unit_tests/sql/dialects/dremio_tests.py        | 16 +++++++-
 tests/unit_tests/sql/parse_tests.py                |  2 +-
 6 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/superset/extensions/pylint.py b/superset/extensions/pylint.py
index c328b1c28f..deb0c86572 100644
--- a/superset/extensions/pylint.py
+++ b/superset/extensions/pylint.py
@@ -87,7 +87,9 @@ class SQLParsingLibraryImportChecker(BaseChecker):
     def _is_disallowed(self, file_path: Path, root_mod: str) -> bool:
         # True if sqlglot is imported outside superset/sql,
         # or if any forbidden library is imported anywhere
-        in_superset_sql = file_path.match("**/superset/sql/**")
+        in_superset_sql = file_path.match("**/superset/sql/**/*.py") or 
file_path.match(
+            "**/superset/sql/*.py"
+        )
         return (root_mod == "sqlglot" and not in_superset_sql) or root_mod in {
             "sqlparse",
             "sqloxide",
diff --git a/superset/sql/dialects/__init__.py 
b/superset/sql/dialects/__init__.py
index ab09de3c2a..3b43b15bec 100644
--- a/superset/sql/dialects/__init__.py
+++ b/superset/sql/dialects/__init__.py
@@ -15,6 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+from .dremio import Dremio
 from .firebolt import Firebolt, FireboltOld
 
-__all__ = ["Firebolt", "FireboltOld"]
+__all__ = ["Dremio", "Firebolt", "FireboltOld"]
diff --git a/superset/sql/dialects/dremio.py b/superset/sql/dialects/dremio.py
new file mode 100644
index 0000000000..d2b8e8c3b2
--- /dev/null
+++ b/superset/sql/dialects/dremio.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sqlglot import exp, generator, parser
+from sqlglot.dialects.dialect import Dialect, rename_func
+
+
+class DremioRegexpSplit(exp.Func):
+    """
+    Custom REGEXP_SPLIT function for Dremio that supports 4 arguments.
+    """
+
+    arg_types = {
+        "this": True,  # string to split
+        "expression": True,  # delimiter pattern
+        "mode": True,  # mode (like 'ALL') - required in Dremio
+        "limit": True,  # limit - required in Dremio
+    }
+
+
+class Dremio(Dialect):
+    class Parser(parser.Parser):
+        FUNCTIONS = {
+            **parser.Parser.FUNCTIONS,
+            "REGEXP_SPLIT": DremioRegexpSplit.from_arg_list,
+        }
+
+    class Generator(generator.Generator):
+        TRANSFORMS = {
+            **generator.Generator.TRANSFORMS,
+            DremioRegexpSplit: rename_func("REGEXP_SPLIT"),
+        }
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index 07b9ae5461..f2fd5474b5 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -35,7 +35,7 @@ from sqlglot.optimizer.pushdown_predicates import 
pushdown_predicates
 from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
 
 from superset.exceptions import QueryClauseValidationException, 
SupersetParseError
-from superset.sql.dialects.firebolt import Firebolt
+from superset.sql.dialects import Dremio, Firebolt
 
 if TYPE_CHECKING:
     from superset.models.core import Database
@@ -59,7 +59,7 @@ SQLGLOT_DIALECTS = {
     "databricks": Dialects.DATABRICKS,
     # "db2": ???
     # "denodo": ???
-    # "dremio": ???
+    "dremio": Dremio,
     "drill": Dialects.DRILL,
     "druid": Dialects.DRUID,
     "duckdb": Dialects.DUCKDB,
diff --git a/superset/sql/dialects/__init__.py 
b/tests/unit_tests/sql/dialects/dremio_tests.py
similarity index 65%
copy from superset/sql/dialects/__init__.py
copy to tests/unit_tests/sql/dialects/dremio_tests.py
index ab09de3c2a..7d7aa9b670 100644
--- a/superset/sql/dialects/__init__.py
+++ b/tests/unit_tests/sql/dialects/dremio_tests.py
@@ -15,6 +15,18 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from .firebolt import Firebolt, FireboltOld
+from sqlglot import parse_one
 
-__all__ = ["Firebolt", "FireboltOld"]
+from superset.sql.dialects.dremio import Dremio
+
+
+def test_regexp_split() -> None:
+    """
+    Test that regexp_split works correctly in Dremio dialect.
+    """
+    sql = "SELECT REGEXP_SPLIT(tags, ',', 'ALL', 1000) as t"
+
+    ast = parse_one(sql, dialect=Dremio)
+    regenerated = ast.sql(dialect=Dremio)
+
+    assert regenerated == "SELECT REGEXP_SPLIT(tags, ',', 'ALL', 1000) AS t"
diff --git a/tests/unit_tests/sql/parse_tests.py 
b/tests/unit_tests/sql/parse_tests.py
index 46774759ab..5d56ab343a 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -1236,7 +1236,7 @@ WHERE
     """.strip()
 
     assert SQLStatement(sql, "sqlite").optimize().format() == optimized
-    assert SQLStatement(sql, "dremio").optimize().format() == not_optimized
+    assert SQLStatement(sql, "crate").optimize().format() == not_optimized
 
     # also works for scripts
     assert SQLScript(sql, "sqlite").optimize().format() == optimized

Reply via email to