This is an automated email from the ASF dual-hosted git repository. beto pushed a commit to branch pinot-dialect-sqlglot in repository https://gitbox.apache.org/repos/asf/superset.git
commit 220f176fd86798299509fceb95055da74c59e629 Author: Beto Dealmeida <[email protected]> AuthorDate: Mon Sep 29 15:56:22 2025 -0400 feat: sqlglot dialect for Pinot --- superset/sql/dialects/__init__.py | 3 +- superset/sql/dialects/{__init__.py => pinot.py} | 26 +- superset/sql/parse.py | 4 +- tests/unit_tests/sql/dialects/pinot_tests.py | 337 ++++++++++++++++++++++++ 4 files changed, 364 insertions(+), 6 deletions(-) diff --git a/superset/sql/dialects/__init__.py b/superset/sql/dialects/__init__.py index 3b43b15bec..f4d56e17e6 100644 --- a/superset/sql/dialects/__init__.py +++ b/superset/sql/dialects/__init__.py @@ -17,5 +17,6 @@ from .dremio import Dremio from .firebolt import Firebolt, FireboltOld +from .pinot import Pinot -__all__ = ["Dremio", "Firebolt", "FireboltOld"] +__all__ = ["Dremio", "Firebolt", "FireboltOld", "Pinot"] diff --git a/superset/sql/dialects/__init__.py b/superset/sql/dialects/pinot.py similarity index 50% copy from superset/sql/dialects/__init__.py copy to superset/sql/dialects/pinot.py index 3b43b15bec..d11c07d611 100644 --- a/superset/sql/dialects/__init__.py +++ b/superset/sql/dialects/pinot.py @@ -15,7 +15,27 @@ # specific language governing permissions and limitations # under the License. -from .dremio import Dremio -from .firebolt import Firebolt, FireboltOld +""" +MySQL ANSI dialect for Apache Pinot. -__all__ = ["Dremio", "Firebolt", "FireboltOld"] +This dialect is based on MySQL but follows ANSI SQL quoting conventions where +double quotes are used for identifiers instead of string literals. +""" + +from __future__ import annotations + +from sqlglot.dialects.mysql import MySQL + + +class Pinot(MySQL): + """ + MySQL ANSI dialect used by Apache Pinot. + + The main difference from standard MySQL is that double quotes (") are used for + identifiers instead of string literals, following ANSI SQL conventions. + """ + + class Tokenizer(MySQL.Tokenizer): + QUOTES = ["'"] # Only single quotes for strings + IDENTIFIERS = ['"', "`"] # Backticks and double quotes for identifiers + STRING_ESCAPES = ["'", "\\"] # Remove double quote from string escapes diff --git a/superset/sql/parse.py b/superset/sql/parse.py index a2b8437698..822b8ec79b 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -44,7 +44,7 @@ from sqlglot.optimizer.scope import ( ) from superset.exceptions import QueryClauseValidationException, SupersetParseError -from superset.sql.dialects import Dremio, Firebolt +from superset.sql.dialects import Dremio, Firebolt, Pinot if TYPE_CHECKING: from superset.models.core import Database @@ -94,7 +94,7 @@ SQLGLOT_DIALECTS = { # "odelasticsearch": ??? "oracle": Dialects.ORACLE, "parseable": Dialects.POSTGRES, - "pinot": Dialects.MYSQL, + "pinot": Pinot, "postgresql": Dialects.POSTGRES, "presto": Dialects.PRESTO, "pydoris": Dialects.DORIS, diff --git a/tests/unit_tests/sql/dialects/pinot_tests.py b/tests/unit_tests/sql/dialects/pinot_tests.py new file mode 100644 index 0000000000..246ab9ffc4 --- /dev/null +++ b/tests/unit_tests/sql/dialects/pinot_tests.py @@ -0,0 +1,337 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import sqlglot + +from superset.sql.dialects.pinot import Pinot + + +def test_double_quotes_as_identifiers() -> None: + """ + Test that double quotes are treated as identifiers, not string literals. + """ + sql = 'SELECT "column_name" FROM "table_name"' + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + "column_name" +FROM "table_name" + """.strip() + ) + + +def test_single_quotes_for_strings() -> None: + """ + Test that single quotes are used for string literals. + """ + sql = "SELECT * FROM users WHERE name = 'John'" + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + * +FROM users +WHERE + name = 'John' + """.strip() + ) + + +def test_backticks_as_identifiers() -> None: + """ + Test that backticks work as identifiers (MySQL-style). + """ + sql = "SELECT `column_name` FROM `table_name`" + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + `column_name` +FROM `table_name` + """.strip() + ) + + +def test_mixed_identifier_quotes() -> None: + """ + Test mixing double quotes and backticks for identifiers. + """ + sql = ( + 'SELECT "col1", `col2` FROM "table1" JOIN `table2` ON "table1".id = `table2`.id' + ) + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + "col1", + `col2` +FROM "table1" +JOIN `table2` + ON "table1".id = `table2`.id + """.strip() + ) + + +def test_string_with_escaped_quotes() -> None: + """ + Test string literals with escaped single quotes. + """ + sql = "SELECT * FROM users WHERE name = 'O''Brien'" + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + * +FROM users +WHERE + name = 'O''Brien' + """.strip() + ) + + +def test_string_with_backslash_escape() -> None: + """ + Test string literals with backslash escapes. + """ + sql = r"SELECT * FROM users WHERE path = 'C:\\Users\\John'" + ast = sqlglot.parse_one(sql, Pinot) + + generated = Pinot().generate(expression=ast, pretty=True) + assert "WHERE" in generated + assert "path" in generated + + [email protected]( + "sql, expected", + [ + ( + 'SELECT COUNT(*) FROM "events" WHERE "type" = \'click\'', + """ +SELECT + COUNT(*) +FROM "events" +WHERE + "type" = 'click' + """.strip(), + ), + ( + 'SELECT "user_id", SUM("amount") FROM "transactions" GROUP BY "user_id"', + """ +SELECT + "user_id", + SUM("amount") +FROM "transactions" +GROUP BY + "user_id" + """.strip(), + ), + ( + "SELECT * FROM \"orders\" WHERE \"status\" IN ('pending', 'shipped')", + """ +SELECT + * +FROM "orders" +WHERE + "status" IN ('pending', 'shipped') + """.strip(), + ), + ], +) +def test_various_queries(sql: str, expected: str) -> None: + """ + Test various SQL queries with Pinot dialect. + """ + ast = sqlglot.parse_one(sql, Pinot) + assert Pinot().generate(expression=ast, pretty=True) == expected + + +def test_aggregate_functions() -> None: + """ + Test aggregate functions with quoted identifiers. + """ + sql = """ +SELECT + "category", + COUNT(*), + AVG("price"), + MAX("quantity") +FROM "products" +GROUP BY "category" + """ + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + "category", + COUNT(*), + AVG("price"), + MAX("quantity") +FROM "products" +GROUP BY + "category" + """.strip() + ) + + +def test_join_with_quoted_identifiers() -> None: + """ + Test JOIN operations with double-quoted identifiers. + """ + sql = """ + SELECT "u"."name", "o"."total" + FROM "users" AS "u" + JOIN "orders" AS "o" ON "u"."id" = "o"."user_id" + """ + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + "u"."name", + "o"."total" +FROM "users" AS "u" +JOIN "orders" AS "o" + ON "u"."id" = "o"."user_id" + """.strip() + ) + + +def test_subquery_with_quoted_identifiers() -> None: + """ + Test subqueries with double-quoted identifiers. + """ + sql = 'SELECT * FROM (SELECT "id", "name" FROM "users") AS "subquery"' + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + * +FROM ( + SELECT + "id", + "name" + FROM "users" +) AS "subquery" + """.strip() + ) + + +def test_case_expression() -> None: + """ + Test CASE expressions with quoted identifiers. + """ + sql = """ + SELECT "name", + CASE WHEN "age" < 18 THEN 'minor' + WHEN "age" >= 18 THEN 'adult' + END AS "category" + FROM "persons" + """ + ast = sqlglot.parse_one(sql, Pinot) + + generated = Pinot().generate(expression=ast, pretty=True) + assert '"name"' in generated + assert '"age"' in generated + assert '"category"' in generated + assert "'minor'" in generated + assert "'adult'" in generated + + +def test_cte_with_quoted_identifiers() -> None: + """ + Test Common Table Expressions (CTE) with quoted identifiers. + """ + sql = """ + WITH "high_value_orders" AS ( + SELECT * FROM "orders" WHERE "total" > 1000 + ) + SELECT "customer_id", COUNT(*) FROM "high_value_orders" GROUP BY "customer_id" + """ + ast = sqlglot.parse_one(sql, Pinot) + + generated = Pinot().generate(expression=ast, pretty=True) + assert 'WITH "high_value_orders" AS' in generated + assert '"orders"' in generated + assert '"total"' in generated + assert '"customer_id"' in generated + + +def test_order_by_with_quoted_identifiers() -> None: + """ + Test ORDER BY clause with quoted identifiers. + """ + sql = 'SELECT "name", "salary" FROM "employees" ORDER BY "salary" DESC, "name" ASC' + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT + "name", + "salary" +FROM "employees" +ORDER BY + "salary" DESC, + "name" + """.strip() + ) + + +def test_limit_and_offset() -> None: + """ + Test LIMIT and OFFSET clauses. + """ + sql = 'SELECT * FROM "products" LIMIT 10 OFFSET 20' + ast = sqlglot.parse_one(sql, Pinot) + + generated = Pinot().generate(expression=ast, pretty=True) + assert '"products"' in generated + assert "LIMIT 10" in generated + + +def test_distinct() -> None: + """ + Test DISTINCT keyword with quoted identifiers. + """ + sql = 'SELECT DISTINCT "category" FROM "products"' + ast = sqlglot.parse_one(sql, Pinot) + + assert ( + Pinot().generate(expression=ast, pretty=True) + == """ +SELECT DISTINCT + "category" +FROM "products" + """.strip() + )
