This is an automated email from the ASF dual-hosted git repository.
Vitor-Avila pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 4311a15eb2b feat(sqlglot): Vertica dialect (#39969)
4311a15eb2b is described below
commit 4311a15eb2bee9bc842396904e0bc3792c853449
Author: Beto Dealmeida <[email protected]>
AuthorDate: Fri May 8 13:34:34 2026 -0400
feat(sqlglot): Vertica dialect (#39969)
---
superset/sql/dialects/__init__.py | 11 +-
superset/sql/dialects/vertica.py | 87 +++++++++++
superset/sql/parse.py | 4 +-
tests/unit_tests/sql/dialects/vertica_tests.py | 197 +++++++++++++++++++++++++
4 files changed, 296 insertions(+), 3 deletions(-)
diff --git a/superset/sql/dialects/__init__.py
b/superset/sql/dialects/__init__.py
index 0334efb5f11..f7b82538115 100644
--- a/superset/sql/dialects/__init__.py
+++ b/superset/sql/dialects/__init__.py
@@ -20,5 +20,14 @@ from .dremio import Dremio
from .firebolt import Firebolt, FireboltOld
from .opensearch import OpenSearch
from .pinot import Pinot
+from .vertica import Vertica
-__all__ = ["DB2", "Dremio", "Firebolt", "FireboltOld", "OpenSearch", "Pinot"]
+__all__ = [
+ "DB2",
+ "Dremio",
+ "Firebolt",
+ "FireboltOld",
+ "OpenSearch",
+ "Pinot",
+ "Vertica",
+]
diff --git a/superset/sql/dialects/vertica.py b/superset/sql/dialects/vertica.py
new file mode 100644
index 00000000000..e26886f6ee8
--- /dev/null
+++ b/superset/sql/dialects/vertica.py
@@ -0,0 +1,87 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Vertica dialect.
+
+Vertica is wire-compatible with PostgreSQL but provides additional analytical
+functions natively. This dialect extends the Postgres dialect to preserve
+Vertica-native functions that the Postgres generator would otherwise rewrite.
+"""
+
+from __future__ import annotations
+
+from sqlglot import exp
+from sqlglot.dialects.dialect import date_delta_sql
+from sqlglot.dialects.postgres import Postgres
+from sqlglot.helper import seq_get
+
+
+def _build_datediff(args: list[exp.Expression]) -> exp.DateDiff:
+ # Vertica's signature is DATEDIFF(unit, start, end); the default sqlglot
+ # parser assumes (end, start, unit), so we remap the positional args.
+ return exp.DateDiff(
+ this=seq_get(args, 2),
+ expression=seq_get(args, 1),
+ unit=exp.var(seq_get(args, 0).name) if seq_get(args, 0) else None,
+ )
+
+
+class Vertica(Postgres):
+ """
+ Vertica dialect.
+
+ Extends PostgreSQL by keeping functions that Vertica supports natively but
+ Postgres does not (e.g. ``LAST_DAY``, ``DATEDIFF``, ``MEDIAN``, ``NVL2``).
+ """
+
+ class Parser(Postgres.Parser):
+ FUNCTIONS = {
+ **Postgres.Parser.FUNCTIONS,
+ "DATEDIFF": _build_datediff,
+ "TIMESTAMPDIFF": _build_datediff,
+ }
+
+ class Generator(Postgres.Generator):
+ # Vertica's LAST_DAY only accepts a date/timestamp; it does not take a
+ # date part argument like Snowflake's variant.
+ LAST_DAY_SUPPORTS_DATE_PART = False
+
+ # Vertica supports MEDIAN and NVL2 natively; Postgres does not, and the
+ # inherited generator rewrites them into PERCENTILE_CONT and CASE
+ # expressions respectively.
+ SUPPORTS_MEDIAN = True
+ NVL2_SUPPORTED = True
+
+ # Emit INTERVAL '<value>' <unit> (SQL-standard) instead of the
+ # Postgres-style INTERVAL '<value> <unit>'. Vertica miscomputes the
+ # combined-string form for MONTH/YEAR units (treats them as a fixed
+ # number of days). See https://forum.vertica.com/discussion/229329/.
+ SINGLE_STRING_INTERVAL = False
+
+ TRANSFORMS = {
+ **Postgres.Generator.TRANSFORMS,
+ # Postgres rewrites LAST_DAY into DATE_TRUNC + INTERVAL arithmetic
+ # because it lacks the function. Vertica supports it natively, so
+ # drop the rewrite and fall back to the base lastday_sql.
+ exp.LastDay: lambda self, e: self.function_fallback_sql(e),
+ # Postgres rewrites DATEDIFF into EXTRACT(epoch ...) / AGE() math.
+ # Vertica's native form is DATEDIFF(unit, start, end), matching
+ # Snowflake's signature.
+ exp.DateDiff: date_delta_sql("DATEDIFF"),
+ exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
+ }
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index bb3ef5e1c4b..aab3a61c50e 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -45,7 +45,7 @@ from sqlglot.optimizer.scope import (
)
from superset.exceptions import QueryClauseValidationException,
SupersetParseError
-from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot
+from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot,
Vertica
if TYPE_CHECKING:
from superset.models.core import Database
@@ -113,7 +113,7 @@ SQLGLOT_DIALECTS = {
# "taosws": ???
"teradatasql": Dialects.TERADATA,
"trino": Dialects.TRINO,
- "vertica": Dialects.POSTGRES,
+ "vertica": Vertica,
"yql": Dialects.CLICKHOUSE,
}
diff --git a/tests/unit_tests/sql/dialects/vertica_tests.py
b/tests/unit_tests/sql/dialects/vertica_tests.py
new file mode 100644
index 00000000000..bf39cf76cde
--- /dev/null
+++ b/tests/unit_tests/sql/dialects/vertica_tests.py
@@ -0,0 +1,197 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from sqlglot import parse_one
+
+from superset.sql.dialects.vertica import Vertica
+
+
+def test_last_day_round_trips_natively() -> None:
+ """
+ Vertica supports LAST_DAY natively, unlike Postgres which rewrites it into
+ DATE_TRUNC + INTERVAL arithmetic.
+ """
+ sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+
+def test_last_day_not_rewritten_like_postgres() -> None:
+ """
+ Postgres rewrites LAST_DAY because it lacks the function. Verify Vertica
+ does not inherit that rewrite.
+ """
+ sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+ postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+ vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+ assert "DATE_TRUNC" in postgres_sql
+ assert "DATE_TRUNC" not in vertica_sql
+ assert "LAST_DAY" in vertica_sql
+
+
+def test_translate_from_postgres_to_vertica() -> None:
+ """
+ A LAST_DAY expression parsed from Postgres should generate the native
+ Vertica form, not the Postgres rewrite.
+ """
+ sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+ ast = parse_one(sql, dialect="postgres")
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
+
+
+def test_postgres_features_still_work() -> None:
+ """
+ Vertica inherits from Postgres, so unrelated Postgres syntax should still
+ parse and regenerate correctly.
+ """
+ sql = "SELECT a::INT, b || c FROM t WHERE d ~ '^foo'"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT CAST(a AS INT), b || c FROM t WHERE d ~
'^foo'"
+
+
+def test_datediff_round_trips_natively() -> None:
+ """
+ Vertica's DATEDIFF is ``DATEDIFF(unit, start, end)``. Postgres rewrites it
+ into ``EXTRACT(epoch ...) / N`` arithmetic.
+ """
+ sql = "SELECT DATEDIFF('day', a, b) FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT DATEDIFF(DAY, a, b) FROM t"
+
+
+def test_datediff_month_round_trips_natively() -> None:
+ """
+ The MONTH unit exercises Postgres's ``AGE()`` rewrite branch, which Vertica
+ does not need.
+ """
+ sql = "SELECT DATEDIFF('month', a, b) FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT DATEDIFF(MONTH, a, b) FROM t"
+
+
+def test_datediff_not_rewritten_like_postgres() -> None:
+ """
+ Postgres rewrites DATEDIFF; Vertica should keep the native call.
+ """
+ sql = "SELECT DATEDIFF('day', a, b) FROM t"
+
+ postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+ vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+ assert "DATEDIFF" not in postgres_sql
+ assert "DATEDIFF(DAY" in vertica_sql
+
+
+def test_median_round_trips_natively() -> None:
+ """
+ Vertica supports MEDIAN as a native analytic/aggregate function. Postgres
+ rewrites it into PERCENTILE_CONT + WITHIN GROUP.
+ """
+ sql = "SELECT MEDIAN(x) FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT MEDIAN(x) FROM t"
+
+
+def test_median_not_rewritten_like_postgres() -> None:
+ sql = "SELECT MEDIAN(x) FROM t"
+
+ postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+ vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+ assert "PERCENTILE_CONT" in postgres_sql
+ assert "PERCENTILE_CONT" not in vertica_sql
+ assert "MEDIAN" in vertica_sql
+
+
+def test_nvl2_round_trips_natively() -> None:
+ """
+ Vertica supports NVL2 natively; Postgres rewrites it into a CASE.
+ """
+ sql = "SELECT NVL2(a, b, c) FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT NVL2(a, b, c) FROM t"
+
+
+def test_nvl2_not_rewritten_like_postgres() -> None:
+ sql = "SELECT NVL2(a, b, c) FROM t"
+
+ postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+ vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+ assert "CASE" in postgres_sql
+ assert "CASE" not in vertica_sql
+ assert "NVL2" in vertica_sql
+
+
+def test_interval_uses_sql_standard_form() -> None:
+ """
+ Vertica miscomputes month/year arithmetic when given the Postgres-style
+ single-string interval (``INTERVAL '2 MONTH'``). The SQL-standard form
+ ``INTERVAL '2' MONTH`` is correct on all Vertica versions.
+ See https://forum.vertica.com/discussion/229329/.
+ """
+ sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT date_col + INTERVAL '2' MONTH FROM t"
+
+
+def test_interval_year_uses_sql_standard_form() -> None:
+ sql = "SELECT date_col + INTERVAL '1 YEAR' FROM t"
+
+ ast = parse_one(sql, dialect=Vertica)
+ regenerated = ast.sql(dialect=Vertica)
+
+ assert regenerated == "SELECT date_col + INTERVAL '1' YEAR FROM t"
+
+
+def test_interval_diverges_from_postgres() -> None:
+ """
+ Postgres emits the combined-string form; Vertica should not.
+ """
+ sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
+
+ postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
+ vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
+
+ assert "INTERVAL '2 MONTH'" in postgres_sql
+ assert "INTERVAL '2' MONTH" in vertica_sql