This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 404f7c1043 feat: initial Dremio sqlglot dialect (#33847)
404f7c1043 is described below
commit 404f7c10434ebed27a9d7c8e5636cd3a22a2eed4
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri Jun 20 12:05:45 2025 -0700
feat: initial Dremio sqlglot dialect (#33847)
---
superset/extensions/pylint.py | 4 +-
superset/sql/dialects/__init__.py | 3 +-
superset/sql/dialects/dremio.py | 46 ++++++++++++++++++++++
superset/sql/parse.py | 4 +-
.../unit_tests/sql/dialects/dremio_tests.py | 16 +++++++-
tests/unit_tests/sql/parse_tests.py | 2 +-
6 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/superset/extensions/pylint.py b/superset/extensions/pylint.py
index c328b1c28f..deb0c86572 100644
--- a/superset/extensions/pylint.py
+++ b/superset/extensions/pylint.py
@@ -87,7 +87,9 @@ class SQLParsingLibraryImportChecker(BaseChecker):
def _is_disallowed(self, file_path: Path, root_mod: str) -> bool:
# True if sqlglot is imported outside superset/sql,
# or if any forbidden library is imported anywhere
- in_superset_sql = file_path.match("**/superset/sql/**")
+ in_superset_sql = file_path.match("**/superset/sql/**/*.py") or
file_path.match(
+ "**/superset/sql/*.py"
+ )
return (root_mod == "sqlglot" and not in_superset_sql) or root_mod in {
"sqlparse",
"sqloxide",
diff --git a/superset/sql/dialects/__init__.py
b/superset/sql/dialects/__init__.py
index ab09de3c2a..3b43b15bec 100644
--- a/superset/sql/dialects/__init__.py
+++ b/superset/sql/dialects/__init__.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+from .dremio import Dremio
from .firebolt import Firebolt, FireboltOld
-__all__ = ["Firebolt", "FireboltOld"]
+__all__ = ["Dremio", "Firebolt", "FireboltOld"]
diff --git a/superset/sql/dialects/dremio.py b/superset/sql/dialects/dremio.py
new file mode 100644
index 0000000000..d2b8e8c3b2
--- /dev/null
+++ b/superset/sql/dialects/dremio.py
@@ -0,0 +1,46 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sqlglot import exp, generator, parser
+from sqlglot.dialects.dialect import Dialect, rename_func
+
+
+class DremioRegexpSplit(exp.Func):
+ """
+ Custom REGEXP_SPLIT function for Dremio that supports 4 arguments.
+ """
+
+ arg_types = {
+ "this": True, # string to split
+ "expression": True, # delimiter pattern
+ "mode": True, # mode (like 'ALL') - required in Dremio
+ "limit": True, # limit - required in Dremio
+ }
+
+
+class Dremio(Dialect):
+ class Parser(parser.Parser):
+ FUNCTIONS = {
+ **parser.Parser.FUNCTIONS,
+ "REGEXP_SPLIT": DremioRegexpSplit.from_arg_list,
+ }
+
+ class Generator(generator.Generator):
+ TRANSFORMS = {
+ **generator.Generator.TRANSFORMS,
+ DremioRegexpSplit: rename_func("REGEXP_SPLIT"),
+ }
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index 07b9ae5461..f2fd5474b5 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -35,7 +35,7 @@ from sqlglot.optimizer.pushdown_predicates import
pushdown_predicates
from sqlglot.optimizer.scope import Scope, ScopeType, traverse_scope
from superset.exceptions import QueryClauseValidationException,
SupersetParseError
-from superset.sql.dialects.firebolt import Firebolt
+from superset.sql.dialects import Dremio, Firebolt
if TYPE_CHECKING:
from superset.models.core import Database
@@ -59,7 +59,7 @@ SQLGLOT_DIALECTS = {
"databricks": Dialects.DATABRICKS,
# "db2": ???
# "denodo": ???
- # "dremio": ???
+ "dremio": Dremio,
"drill": Dialects.DRILL,
"druid": Dialects.DRUID,
"duckdb": Dialects.DUCKDB,
diff --git a/superset/sql/dialects/__init__.py
b/tests/unit_tests/sql/dialects/dremio_tests.py
similarity index 65%
copy from superset/sql/dialects/__init__.py
copy to tests/unit_tests/sql/dialects/dremio_tests.py
index ab09de3c2a..7d7aa9b670 100644
--- a/superset/sql/dialects/__init__.py
+++ b/tests/unit_tests/sql/dialects/dremio_tests.py
@@ -15,6 +15,18 @@
# specific language governing permissions and limitations
# under the License.
-from .firebolt import Firebolt, FireboltOld
+from sqlglot import parse_one
-__all__ = ["Firebolt", "FireboltOld"]
+from superset.sql.dialects.dremio import Dremio
+
+
+def test_regexp_split() -> None:
+ """
+ Test that regexp_split works correctly in Dremio dialect.
+ """
+ sql = "SELECT REGEXP_SPLIT(tags, ',', 'ALL', 1000) as t"
+
+ ast = parse_one(sql, dialect=Dremio)
+ regenerated = ast.sql(dialect=Dremio)
+
+ assert regenerated == "SELECT REGEXP_SPLIT(tags, ',', 'ALL', 1000) AS t"
diff --git a/tests/unit_tests/sql/parse_tests.py
b/tests/unit_tests/sql/parse_tests.py
index 46774759ab..5d56ab343a 100644
--- a/tests/unit_tests/sql/parse_tests.py
+++ b/tests/unit_tests/sql/parse_tests.py
@@ -1236,7 +1236,7 @@ WHERE
""".strip()
assert SQLStatement(sql, "sqlite").optimize().format() == optimized
- assert SQLStatement(sql, "dremio").optimize().format() == not_optimized
+ assert SQLStatement(sql, "crate").optimize().format() == not_optimized
# also works for scripts
assert SQLScript(sql, "sqlite").optimize().format() == optimized