rusackas commented on code in PR #41125:
URL: https://github.com/apache/superset/pull/41125#discussion_r3464630948
##########
tests/unit_tests/sql/parse_tests.py:
##########
@@ -3208,6 +3213,54 @@ def test_sanitize_clause(sql: str, expected: str |
Exception, engine: str) -> No
sanitize_clause(sql, engine)
[email protected](
+ "engine",
+ ["postgresql", "redshift", "cockroachdb", "netezza", "hana", "base",
"mysql"],
+)
+def test_sanitize_clause_preserves_aggregation_semantics(engine: str) -> None:
+ """
+ Regression test for https://github.com/apache/superset/issues/36113.
+
+ `sanitize_clause` must not silently rewrite a user-authored expression. The
+ Postgres SQLGlot dialect (which several engines borrow) rewrites
+ ``ROUND(AVG(x), n)`` to ``ROUND(CAST(AVG(x) AS DECIMAL), n)`` at generation
+ time. On engines whose unqualified ``DECIMAL`` defaults to scale 0 (e.g.
+ Redshift, Netezza) the injected cast rounds the aggregate to an integer
+ *before* the explicit ``ROUND``, producing wrong results.
+
+ The clause must be returned unchanged regardless of the engine dialect.
+ """
+ clause = "ROUND(AVG(col), 4)"
+ sanitized = sanitize_clause(clause, engine)
+ assert "CAST" not in sanitized.upper(), (
+ f"sanitize_clause injected a cast for engine {engine!r}: {sanitized!r}"
+ )
+ assert sanitized == clause
+
+
[email protected](
+ "engine",
+ ["postgresql", "redshift", "cockroachdb", "netezza", "hana", "base",
"mysql"],
+)
+def test_sanitize_clause_preserves_aggregation_semantics_with_comment(
+ engine: str,
+) -> None:
+ """
+ Regression test for https://github.com/apache/superset/issues/36113.
+
+ A clause that contains a comment takes the re-rendering branch of
+ ``sanitize_clause``. That branch must normalize comments using the *base*
+ dialect rather than the engine dialect, so it must not re-apply the
Postgres
+ ``ROUND(AVG(x), n)`` -> ``ROUND(CAST(AVG(x) AS DECIMAL), n)`` rewrite that
+ truncates results on engines where ``DECIMAL`` defaults to scale 0.
+ """
+ clause = "ROUND(AVG(col), 4) /* precise_count_distinct=true */"
+ sanitized = sanitize_clause(clause, engine)
+ assert "CAST" not in sanitized.upper(), (
+ f"sanitize_clause injected a cast for engine {engine!r}: {sanitized!r}"
Review Comment:
Good catch — added assertions that both the `ROUND(AVG(col), 4)` expression
and the comment payload survive verbatim, so a drop/rewrite would now fail the
test.
##########
tests/integration_tests/sqla_models_tests.py:
##########
@@ -199,8 +199,8 @@ def test_jinja_metrics_and_calc_columns(self,
mock_username):
assert "'foo_P1D'" in query
# assert dataset saved metric
assert "count('bar_P1D')" in query
- # assert adhoc metric
- assert "SUM(CASE WHEN user = 'user_abc' THEN 1 ELSE 0 END)" in query
+ # assert adhoc metric (sanitize_clause preserves the user's SQL
verbatim)
+ assert "SUM(case when user = 'user_abc' then 1 else 0 end)" in query
Review Comment:
Added the type hints on the signature.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]