This is an automated email from the ASF dual-hosted git repository.
beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new e4cb84bc02 feat: DB2 dialect for sqlglot (#36365)
e4cb84bc02 is described below
commit e4cb84bc02489d756f53b58ad17fa23dbd701028
Author: Beto Dealmeida <[email protected]>
AuthorDate: Tue Dec 2 12:19:52 2025 -0500
feat: DB2 dialect for sqlglot (#36365)
---
superset/sql/dialects/__init__.py | 3 +-
superset/sql/dialects/db2.py | 148 +++++++++++++++++
superset/sql/parse.py | 4 +-
tests/unit_tests/sql/dialects/db2_tests.py | 246 +++++++++++++++++++++++++++++
4 files changed, 398 insertions(+), 3 deletions(-)
diff --git a/superset/sql/dialects/__init__.py
b/superset/sql/dialects/__init__.py
index f4d56e17e6..71c8958a80 100644
--- a/superset/sql/dialects/__init__.py
+++ b/superset/sql/dialects/__init__.py
@@ -15,8 +15,9 @@
# specific language governing permissions and limitations
# under the License.
+from .db2 import DB2
from .dremio import Dremio
from .firebolt import Firebolt, FireboltOld
from .pinot import Pinot
-__all__ = ["Dremio", "Firebolt", "FireboltOld", "Pinot"]
+__all__ = ["DB2", "Dremio", "Firebolt", "FireboltOld", "Pinot"]
diff --git a/superset/sql/dialects/db2.py b/superset/sql/dialects/db2.py
new file mode 100644
index 0000000000..4f70543be3
--- /dev/null
+++ b/superset/sql/dialects/db2.py
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+IBM DB2 dialect.
+
+DB2 uses labeled durations for date arithmetic (e.g., expr + 1 DAYS).
+This syntax is non-standard and requires custom parser support.
+"""
+
+from __future__ import annotations
+
+from sqlglot import exp, tokens
+from sqlglot.dialects.postgres import Postgres
+
+
+class DB2Interval(exp.Expression):
+ """DB2 labeled duration expression (e.g., '1 DAYS', '2 MONTHS')."""
+
+ arg_types = {"this": True, "unit": True}
+
+
+class DB2(Postgres):
+ """
+ IBM DB2 dialect.
+
+ Extends PostgreSQL with support for labeled durations in date arithmetic.
+ """
+
+ class Tokenizer(Postgres.Tokenizer):
+ """DB2 SQL tokenizer with support for DB2-specific keywords."""
+
+ KEYWORDS = {
+ **Postgres.Tokenizer.KEYWORDS,
+ # Time units; can follow numbers in date arithmetic
+ "MICROSECOND": tokens.TokenType.VAR,
+ "MICROSECONDS": tokens.TokenType.VAR,
+ "SECOND": tokens.TokenType.VAR,
+ "SECONDS": tokens.TokenType.VAR,
+ "MINUTE": tokens.TokenType.VAR,
+ "MINUTES": tokens.TokenType.VAR,
+ "HOUR": tokens.TokenType.VAR,
+ "HOURS": tokens.TokenType.VAR,
+ "DAY": tokens.TokenType.VAR,
+ "DAYS": tokens.TokenType.VAR,
+ "MONTH": tokens.TokenType.VAR,
+ "MONTHS": tokens.TokenType.VAR,
+ "YEAR": tokens.TokenType.VAR,
+ "YEARS": tokens.TokenType.VAR,
+ }
+
+ class Parser(Postgres.Parser):
+ """DB2 SQL parser with support for labeled durations."""
+
+ def _parse_term(self) -> exp.Expression | None:
+ """
+ Override term parsing to support DB2 labeled durations.
+
+ This is called during expression parsing for addition/subtraction
+ operations. We intercept patterns like `expr + 1 DAYS` and parse
them
+ specially.
+ """
+ this = self._parse_factor()
+ if not this:
+ return None
+
+ while self._match_set((tokens.TokenType.PLUS,
tokens.TokenType.DASH)):
+ op = self._prev.token_type
+
+ # Parse the right side of the + or -
+ rhs = self._parse_factor()
+ if not rhs: # pragma: no cover
+ break
+
+ # Check if there's a time unit after the right side
+ # This handles patterns like: expr + 1 DAYS, expr + (func())
DAYS
+ if (
+ self._curr
+ and self._curr.token_type == tokens.TokenType.VAR
+ and self._curr.text.upper()
+ in {
+ "MICROSECOND",
+ "MICROSECONDS",
+ "SECOND",
+ "SECONDS",
+ "MINUTE",
+ "MINUTES",
+ "HOUR",
+ "HOURS",
+ "DAY",
+ "DAYS",
+ "MONTH",
+ "MONTHS",
+ "YEAR",
+ "YEARS",
+ }
+ ):
+ # Found a DB2 labeled duration
+ unit_token = self._curr
+ self._advance()
+
+ duration = DB2Interval(
+ this=rhs,
+ unit=exp.Literal.string(unit_token.text.upper()),
+ )
+
+ if op == tokens.TokenType.PLUS:
+ this = exp.Add(this=this, expression=duration)
+ else:
+ this = exp.Sub(this=this, expression=duration)
+ else:
+ # Not a labeled duration - use normal Add/Sub
+ if op == tokens.TokenType.PLUS:
+ this = exp.Add(this=this, expression=rhs)
+ else:
+ this = exp.Sub(this=this, expression=rhs)
+
+ return this
+
+ class Generator(Postgres.Generator):
+ """DB2 SQL generator."""
+
+ TRANSFORMS = {
+ **Postgres.Generator.TRANSFORMS,
+ }
+
+ def db2interval_sql(self, expression: DB2Interval) -> str:
+ """Generate SQL for DB2Interval expressions."""
+ # Don't quote the unit (DAYS, MONTHS, etc.) - it's a keyword, not
a string
+ unit = expression.args["unit"]
+ unit_text = (
+ unit.this if isinstance(unit, exp.Literal) else
str(unit).upper()
+ )
+ return f"{self.sql(expression, 'this')} {unit_text}"
diff --git a/superset/sql/parse.py b/superset/sql/parse.py
index 4d4a72d293..ba4d288a97 100644
--- a/superset/sql/parse.py
+++ b/superset/sql/parse.py
@@ -45,7 +45,7 @@ from sqlglot.optimizer.scope import (
)
from superset.exceptions import QueryClauseValidationException,
SupersetParseError
-from superset.sql.dialects import Dremio, Firebolt, Pinot
+from superset.sql.dialects import DB2, Dremio, Firebolt, Pinot
if TYPE_CHECKING:
from superset.models.core import Database
@@ -67,7 +67,7 @@ SQLGLOT_DIALECTS = {
# "crate": ???
# "databend": ???
"databricks": Dialects.DATABRICKS,
- # "db2": ???
+ "db2": DB2,
# "denodo": ???
"dremio": Dremio,
"drill": Dialects.DRILL,
diff --git a/tests/unit_tests/sql/dialects/db2_tests.py
b/tests/unit_tests/sql/dialects/db2_tests.py
new file mode 100644
index 0000000000..b8add1d65b
--- /dev/null
+++ b/tests/unit_tests/sql/dialects/db2_tests.py
@@ -0,0 +1,246 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+from sqlglot import errors, parse_one
+
+from superset.sql.dialects.db2 import DB2
+
+
+def test_month_truncation() -> None:
+ """
+ Test the month truncation pattern from Db2EngineSpec time grains.
+ """
+ sql = """
+SELECT "DATE" - (DAY("DATE")-1) DAYS AS "DATE", sum("TOTAL_FEE") AS
"SUM(TOTAL_FEE)"
+ """
+
+ # test with the generic dialect -- raises exception
+ with pytest.raises(errors.ParseError):
+ parse_one(sql)
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == (
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS AS "DATE", '
+ 'SUM("TOTAL_FEE") AS "SUM(TOTAL_FEE)"'
+ )
+
+
+def test_labeled_duration_with_day_function() -> None:
+ """
+ Test labeled duration with DAY function.
+ """
+ sql = "SELECT CURRENT_DATE - DAY(CURRENT_DATE) DAYS"
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == "SELECT CURRENT_DATE - DAY(CURRENT_DATE) DAYS"
+
+
+def test_labeled_duration_with_expression() -> None:
+ """
+ Test labeled duration with complex expressions (from real DB2 queries).
+ """
+ sql = 'SELECT "DATE" - (DAY("DATE") - 1) DAYS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == 'SELECT "DATE" - (DAY("DATE") - 1) DAYS'
+
+
+def test_labeled_duration_with_month_function() -> None:
+ """
+ Test labeled duration with MONTH function.
+ """
+ sql = 'SELECT "DATE" - (MONTH("DATE") - 1) MONTHS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == 'SELECT "DATE" - (MONTH("DATE") - 1) MONTHS'
+
+
+def test_year_truncation() -> None:
+ """
+ Test the year truncation pattern from Db2EngineSpec time grains.
+ """
+ sql = 'SELECT "DATE" - (DAY("DATE")-1) DAYS - (MONTH("DATE")-1) MONTHS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == (
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS - (MONTH("DATE") - 1) MONTHS'
+ )
+
+
+def test_quarter_truncation() -> None:
+ """
+ Test the quarter truncation pattern from Db2EngineSpec time grains.
+ """
+ sql = (
+ 'SELECT "DATE" - (DAY("DATE")-1) DAYS - (MONTH("DATE")-1) MONTHS'
+ ' + ((QUARTER("DATE")-1) * 3) MONTHS'
+ )
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == (
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS - (MONTH("DATE") - 1) MONTHS'
+ ' + ((QUARTER("DATE") - 1) * 3) MONTHS'
+ )
+
+
+def test_regular_column_aliasing_still_works() -> None:
+ """
+ Test that regular column aliasing still works (regression test).
+ """
+ sql = "SELECT col1 AS days FROM table"
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == "SELECT col1 AS days FROM table"
+
+
[email protected](
+ "sql, expected",
+ [
+ ("SELECT col1 AS day", "SELECT col1 AS day"),
+ ("SELECT col1 AS month", "SELECT col1 AS month"),
+ ("SELECT col1 AS year", "SELECT col1 AS year"),
+ ("SELECT col1 AS days", "SELECT col1 AS days"),
+ ("SELECT col1 AS months", "SELECT col1 AS months"),
+ ("SELECT col1 AS years", "SELECT col1 AS years"),
+ ],
+)
+def test_column_aliasing_with_reserved_words(sql: str, expected: str) -> None:
+ """
+ Test column aliasing with DB2 time unit words.
+ """
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+ assert regenerated == expected
+
+
[email protected](
+ "sql, expected",
+ [
+ # Function-based patterns
+ ('SELECT "DATE" - DAY("DATE") DAYS', 'SELECT "DATE" - DAY("DATE")
DAYS'),
+ (
+ 'SELECT "DATE" + MONTH("DATE") MONTHS',
+ 'SELECT "DATE" + MONTH("DATE") MONTHS',
+ ),
+ ('SELECT "DATE" - YEAR("DATE") YEARS', 'SELECT "DATE" - YEAR("DATE")
YEARS'),
+ # Complex expression patterns
+ (
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS',
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS',
+ ),
+ (
+ 'SELECT "DATE" + (MONTH("DATE") + 2) MONTHS',
+ 'SELECT "DATE" + (MONTH("DATE") + 2) MONTHS',
+ ),
+ # Nested expressions
+ (
+ 'SELECT "DATE" - ((DAY("DATE") - 1) + 1) DAYS - (MONTH("DATE") -
1) MONTHS',
+ 'SELECT "DATE" - ((DAY("DATE") - 1) + 1) DAYS - (MONTH("DATE") -
1) MONTHS',
+ ),
+ ],
+)
+def test_labeled_duration_variations(sql: str, expected: str) -> None:
+ """
+ Test various labeled duration patterns that should work.
+ """
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+ assert regenerated == expected
+
+
+def test_addition_with_labeled_duration() -> None:
+ """
+ Test addition operations with labeled durations.
+ """
+ sql = 'SELECT "DATE" + (DAY("DATE") + 5) DAYS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == 'SELECT "DATE" + (DAY("DATE") + 5) DAYS'
+
+
+def test_arithmetic_with_different_units() -> None:
+ """
+ Test arithmetic operations mixing different time units.
+ """
+ sql = (
+ 'SELECT "DATE" - (DAY("DATE")-1) DAYS '
+ '- (MONTH("DATE")-1) MONTHS + '
+ '(YEAR("DATE")-1) YEARS'
+ )
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == (
+ 'SELECT "DATE" - (DAY("DATE") - 1) DAYS - '
+ '(MONTH("DATE") - 1) MONTHS + '
+ '(YEAR("DATE") - 1) YEARS'
+ )
+
+
+def test_multiple_function_calls_in_duration() -> None:
+ """
+ Test labeled duration with multiple function calls.
+ """
+ sql = 'SELECT "DATE" - (DAY("DATE") + MONTH("DATE")) DAYS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == 'SELECT "DATE" - (DAY("DATE") + MONTH("DATE")) DAYS'
+
+
+def test_labeled_duration_with_multiplication() -> None:
+ """
+ Test labeled duration with multiplication in the expression.
+ """
+ sql = 'SELECT "DATE" + ((QUARTER("DATE") - 1) * 3) MONTHS'
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ assert regenerated == 'SELECT "DATE" + ((QUARTER("DATE") - 1) * 3) MONTHS'
+
+
+def test_column_plus_literal_duration() -> None:
+ """
+ Test column + literal number with time unit.
+ """
+ sql = "SELECT col + 1 DAYS FROM t"
+
+ ast = parse_one(sql, dialect=DB2)
+ regenerated = ast.sql(dialect=DB2)
+
+ # Should parse as (col + 1 DAYS), not (col + 1) AS DAYS
+ assert regenerated == "SELECT col + 1 DAYS FROM t"