This is an automated email from the ASF dual-hosted git repository.

beto pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new bf88d9bb1c fix(pinot): restrict types in dialect (#35337)
bf88d9bb1c is described below

commit bf88d9bb1c20a760c2fa2da8c01cd737d6a00128
Author: Beto Dealmeida <[email protected]>
AuthorDate: Tue Sep 30 16:34:53 2025 -0400

    fix(pinot): restrict types in dialect (#35337)
---
 superset/sql/dialects/pinot.py               | 54 ++++++++++++++++++++
 tests/unit_tests/sql/dialects/pinot_tests.py | 75 ++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+)

diff --git a/superset/sql/dialects/pinot.py b/superset/sql/dialects/pinot.py
index e8804b2ee8..05d32f004b 100644
--- a/superset/sql/dialects/pinot.py
+++ b/superset/sql/dialects/pinot.py
@@ -24,7 +24,9 @@ double quotes are used for identifiers instead of string 
literals.
 
 from __future__ import annotations
 
+from sqlglot import exp
 from sqlglot.dialects.mysql import MySQL
+from sqlglot.tokens import TokenType
 
 
 class Pinot(MySQL):
@@ -41,3 +43,55 @@ class Pinot(MySQL):
         QUOTES = ["'"]  # Only single quotes for strings
         IDENTIFIERS = ['"', "`"]  # Backticks and double quotes for identifiers
         STRING_ESCAPES = ["'", "\\"]  # Remove double quote from string escapes
+        KEYWORDS = {
+            **MySQL.Tokenizer.KEYWORDS,
+            "STRING": TokenType.TEXT,
+            "LONG": TokenType.BIGINT,
+            "BYTES": TokenType.VARBINARY,
+        }
+
+    class Generator(MySQL.Generator):
+        TYPE_MAPPING = {
+            **MySQL.Generator.TYPE_MAPPING,
+            exp.DataType.Type.TINYINT: "INT",
+            exp.DataType.Type.SMALLINT: "INT",
+            exp.DataType.Type.INT: "INT",
+            exp.DataType.Type.BIGINT: "LONG",
+            exp.DataType.Type.FLOAT: "FLOAT",
+            exp.DataType.Type.DOUBLE: "DOUBLE",
+            exp.DataType.Type.BOOLEAN: "BOOLEAN",
+            exp.DataType.Type.TIMESTAMP: "TIMESTAMP",
+            exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP",
+            exp.DataType.Type.VARCHAR: "STRING",
+            exp.DataType.Type.CHAR: "STRING",
+            exp.DataType.Type.TEXT: "STRING",
+            exp.DataType.Type.BINARY: "BYTES",
+            exp.DataType.Type.VARBINARY: "BYTES",
+            exp.DataType.Type.JSON: "JSON",
+        }
+
+        # Override MySQL's CAST_MAPPING - don't convert integer or string types
+        CAST_MAPPING = {
+            exp.DataType.Type.LONGBLOB: exp.DataType.Type.VARBINARY,
+            exp.DataType.Type.MEDIUMBLOB: exp.DataType.Type.VARBINARY,
+            exp.DataType.Type.TINYBLOB: exp.DataType.Type.VARBINARY,
+            exp.DataType.Type.UBIGINT: "UNSIGNED",
+        }
+
+        def datatype_sql(self, expression: exp.DataType) -> str:
+            # Don't use MySQL's VARCHAR size requirement logic
+            # Just use TYPE_MAPPING for all types
+            type_value = expression.this
+            type_sql = (
+                self.TYPE_MAPPING.get(type_value, type_value.value)
+                if isinstance(type_value, exp.DataType.Type)
+                else type_value
+            )
+
+            interior = self.expressions(expression, flat=True)
+            nested = f"({interior})" if interior else ""
+
+            if expression.this in self.UNSIGNED_TYPE_MAPPING:
+                return f"{type_sql} UNSIGNED{nested}"
+
+            return f"{type_sql}{nested}"
diff --git a/tests/unit_tests/sql/dialects/pinot_tests.py 
b/tests/unit_tests/sql/dialects/pinot_tests.py
index 4d7eed7154..f1a6cfb729 100644
--- a/tests/unit_tests/sql/dialects/pinot_tests.py
+++ b/tests/unit_tests/sql/dialects/pinot_tests.py
@@ -346,3 +346,78 @@ SELECT DISTINCT
 FROM "products"
         """.strip()
     )
+
+
+def test_cast_to_string() -> None:
+    """
+    Test that CAST to STRING is preserved (not converted to CHAR).
+    """
+    sql = "SELECT CAST(cohort_size AS STRING) FROM table"
+    ast = sqlglot.parse_one(sql, Pinot)
+    generated = Pinot().generate(expression=ast)
+
+    assert "STRING" in generated
+    assert "CHAR" not in generated
+
+
+def test_concat_with_cast_string() -> None:
+    """
+    Test CONCAT with CAST to STRING - verifies the original issue is fixed.
+    """
+    sql = """
+SELECT concat(a, cast(b AS string), ' - ')
+FROM "default".c"""
+    ast = sqlglot.parse_one(sql, Pinot)
+    generated = Pinot().generate(expression=ast)
+
+    # Verify STRING type is preserved (not converted to CHAR)
+    assert "STRING" in generated or "string" in generated.lower()
+    assert "CHAR" not in generated
+
+
[email protected](
+    "cast_type, expected_type",
+    [
+        ("INT", "INT"),
+        ("TINYINT", "INT"),
+        ("SMALLINT", "INT"),
+        ("BIGINT", "LONG"),
+        ("LONG", "LONG"),
+        ("FLOAT", "FLOAT"),
+        ("DOUBLE", "DOUBLE"),
+        ("BOOLEAN", "BOOLEAN"),
+        ("TIMESTAMP", "TIMESTAMP"),
+        ("STRING", "STRING"),
+        ("VARCHAR", "STRING"),
+        ("CHAR", "STRING"),
+        ("TEXT", "STRING"),
+        ("BYTES", "BYTES"),
+        ("BINARY", "BYTES"),
+        ("VARBINARY", "BYTES"),
+        ("JSON", "JSON"),
+    ],
+)
+def test_type_mappings(cast_type: str, expected_type: str) -> None:
+    """
+    Test that Pinot type mappings work correctly for all basic types.
+    """
+    sql = f"SELECT CAST(col AS {cast_type}) FROM table"  # noqa: S608
+    ast = sqlglot.parse_one(sql, Pinot)
+    generated = Pinot().generate(expression=ast)
+
+    assert expected_type in generated
+
+
+def test_unsigned_type() -> None:
+    """
+    Test that unsigned integer types are handled correctly.
+    Tests the UNSIGNED_TYPE_MAPPING path in datatype_sql method.
+    """
+    from sqlglot import exp
+
+    # Create a UBIGINT DataType which is in UNSIGNED_TYPE_MAPPING
+    dt = exp.DataType(this=exp.DataType.Type.UBIGINT)
+    result = Pinot.Generator().datatype_sql(dt)
+
+    assert "UNSIGNED" in result
+    assert "BIGINT" in result

Reply via email to