(superset) 01/01: fix: don't strip SQL comments in Explore - 2nd try

maximebeauchemin Tue, 28 May 2024 17:55:51 -0700

This is an automated email from the ASF dual-hosted git repository.

maximebeauchemin pushed a commit to branch rerevert
in repository https://gitbox.apache.org/repos/asf/superset.git


commit 87591a0f2bb657b15f52436e45e846163f8b8f8a
Author: Maxime Beauchemin <[email protected]>
AuthorDate: Tue May 28 17:37:55 2024 -0700

    fix: don't strip SQL comments in Explore - 2nd try
    
    Here I'm trying to recreate the issue that led to revert #28363 in 
https://github.com/apache/superset/pull/28567, likely some sort of trailing 
comment of some kind. This is DRAFT for now until I can reproduce the issue, 
write a covering test for it, and address it.
    
    First attempt at creating a problematic virtual dataset was the following ->
---
 superset/db_engine_specs/base.py            | 5 ++---
 superset/models/helpers.py                  | 7 ++++++-
 superset/sqllab/query_render.py             | 1 -
 tests/integration_tests/conftest.py         | 1 +
 tests/integration_tests/core_tests.py       | 4 +++-
 tests/integration_tests/datasource_tests.py | 8 +++++---
 tests/integration_tests/sqllab_tests.py     | 4 ++--
 7 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 6df0dc61aa..60bfd444f3 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -1133,9 +1133,8 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         cte = None
         sql_remainder = None
         sql = sql.strip(" \t\n;")
-        sql_statement = sqlparse.format(sql, strip_comments=True)
         query_limit: int | None = sql_parse.extract_top_from_query(
-            sql_statement, cls.top_keywords
+            sql, cls.top_keywords
         )
         if not limit:
             final_limit = query_limit
@@ -1144,7 +1143,7 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
         else:
             final_limit = limit
         if not cls.allows_cte_in_subquery:
-            cte, sql_remainder = 
sql_parse.get_cte_remainder_query(sql_statement)
+            cte, sql_remainder = sql_parse.get_cte_remainder_query(sql)
         if cte:
             str_statement = str(sql_remainder)
             cte = cte + "\n"
diff --git a/superset/models/helpers.py b/superset/models/helpers.py
index dffb285663..f7c350edc8 100644
--- a/superset/models/helpers.py
+++ b/superset/models/helpers.py
@@ -1086,7 +1086,7 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
                 _("Virtual dataset query cannot consist of multiple 
statements")
             )
 
-        sql = script.statements[0].format(comments=False)
+        sql = script.statements[0].format()
         if not sql:
             raise QueryObjectValidationError(_("Virtual dataset query cannot 
be empty"))
         return sql
@@ -1104,6 +1104,11 @@ class ExploreMixin:  # pylint: 
disable=too-many-public-methods
         """
 
         from_sql = self.get_rendered_sql(template_processor)
+
+        # TEST this SOLUTION (?)
+        # Add a line break in case last line happens to be a comment
+        # from_sql = from_sql + '\n'
+
         parsed_query = ParsedQuery(from_sql, engine=self.db_engine_spec.engine)
         if not (
             parsed_query.is_unknown()
diff --git a/superset/sqllab/query_render.py b/superset/sqllab/query_render.py
index caf9a3cb2b..67bea2b1fb 100644
--- a/superset/sqllab/query_render.py
+++ b/superset/sqllab/query_render.py
@@ -60,7 +60,6 @@ class SqlQueryRenderImpl(SqlQueryRender):
 
             parsed_query = ParsedQuery(
                 query_model.sql,
-                strip_comments=True,
                 engine=query_model.database.db_engine_spec.engine,
             )
             rendered_query = sql_template_processor.process_template(
diff --git a/tests/integration_tests/conftest.py 
b/tests/integration_tests/conftest.py
index 84c5793105..5e794c4032 100644
--- a/tests/integration_tests/conftest.py
+++ b/tests/integration_tests/conftest.py
@@ -304,6 +304,7 @@ def virtual_dataset():
             "SELECT 3 as col1, 'd' as col2, 1.3, NULL, '2000-01-04 00:00:00', 
4 "
             "UNION ALL "
             "SELECT 4 as col1, 'e' as col2, 1.4, NULL, '2000-01-05 00:00:00', 
5 "
+            "\n /* CONTAINS A RANDOM COMMENT */ \n"
             "UNION ALL "
             "SELECT 5 as col1, 'f' as col2, 1.5, NULL, '2000-01-06 00:00:00', 
6 "
             "UNION ALL "
diff --git a/tests/integration_tests/core_tests.py 
b/tests/integration_tests/core_tests.py
index 923fe4a6b9..af8a20ec61 100644
--- a/tests/integration_tests/core_tests.py
+++ b/tests/integration_tests/core_tests.py
@@ -539,7 +539,9 @@ class TestCore(SupersetTestCase):
             database=get_example_database(),
         )
         rendered_query = str(table.get_from_clause()[0])
-        self.assertEqual(clean_query, rendered_query)
+        assert "comment 1" in rendered_query
+        assert "comment 2" in rendered_query
+        assert "FROM tbl" in rendered_query
 
     def test_slice_payload_no_datasource(self):
         form_data = {
diff --git a/tests/integration_tests/datasource_tests.py 
b/tests/integration_tests/datasource_tests.py
index 1b7fcb733b..718b6d2d98 100644
--- a/tests/integration_tests/datasource_tests.py
+++ b/tests/integration_tests/datasource_tests.py
@@ -538,10 +538,12 @@ def test_get_samples(test_client, login_as_admin, 
virtual_dataset):
     assert "coltypes" in rv2.json["result"]
     assert "data" in rv2.json["result"]
 
-    eager_samples = virtual_dataset.database.get_df(
-        f"select * from ({virtual_dataset.sql}) as tbl"
-        f' limit {app.config["SAMPLES_ROW_LIMIT"]}'
+    sql = (
+        f"select * from ({virtual_dataset.sql}) as tbl "
+        f'limit {app.config["SAMPLES_ROW_LIMIT"]}'
     )
+    eager_samples = virtual_dataset.database.get_df(sql)
+
     # the col3 is Decimal
     eager_samples["col3"] = eager_samples["col3"].apply(float)
     eager_samples = eager_samples.to_dict(orient="records")
diff --git a/tests/integration_tests/sqllab_tests.py 
b/tests/integration_tests/sqllab_tests.py
index 3602097b20..3f5a211db3 100644
--- a/tests/integration_tests/sqllab_tests.py
+++ b/tests/integration_tests/sqllab_tests.py
@@ -573,9 +573,9 @@ class TestSqlLab(SupersetTestCase):
         assert data["status"] == "success"
 
         data = self.run_sql(
-            "SELECT * FROM birth_names WHERE state = '{{ state }}' -- blabblah 
{{ extra1 }} {{fake.fn()}}\nLIMIT 10",
+            "SELECT * FROM birth_names WHERE state = '{{ state }}' -- blabblah 
{{ extra1 }}\nLIMIT 10",
             "3",
-            template_params=json.dumps({"state": "CA"}),
+            template_params=json.dumps({"state": "CA", "extra1": "comment"}),
         )
         assert data["status"] == "success"

(superset) 01/01: fix: don't strip SQL comments in Explore - 2nd try

Reply via email to