This is an automated email from the ASF dual-hosted git repository.

villebro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 6ede3271ff fix(SQL Lab): hang when result set size is too big (#30522)
6ede3271ff is described below

commit 6ede3271ff3e4c82a53a08e0dd18b35e01c4fa4d
Author: anamitraadhikari <[email protected]>
AuthorDate: Mon Oct 14 18:03:28 2024 -0700

    fix(SQL Lab): hang when result set size is too big (#30522)
    
    Co-authored-by: aadhikari <[email protected]>
    Co-authored-by: Ville Brofeldt <[email protected]>
---
 docs/static/resources/openapi.json                 |   3 +-
 .../superset-ui-core/src/query/types/Query.ts      |   1 +
 superset-frontend/src/setup/setupErrorMessages.ts  |   4 +
 superset/config.py                                 |   3 +
 superset/errors.py                                 |   3 +
 superset/sql_lab.py                                |  36 +++++++
 tests/unit_tests/sql_lab_test.py                   | 115 ++++++++++++++++++++-
 7 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/docs/static/resources/openapi.json 
b/docs/static/resources/openapi.json
index a039bd3a2a..43781ac965 100644
--- a/docs/static/resources/openapi.json
+++ b/docs/static/resources/openapi.json
@@ -116,7 +116,8 @@
                           "GENERIC_BACKEND_ERROR",
                           "INVALID_PAYLOAD_FORMAT_ERROR",
                           "INVALID_PAYLOAD_SCHEMA_ERROR",
-                          "REPORT_NOTIFICATION_ERROR"
+                          "REPORT_NOTIFICATION_ERROR",
+                          "RESULT_TOO_LARGE_ERROR"
                         ],
                         "type": "string"
                       },
diff --git 
a/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts 
b/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
index 4feba17319..a83d17a937 100644
--- a/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
+++ b/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
@@ -227,6 +227,7 @@ export const ErrorTypeEnum = {
   ASYNC_WORKERS_ERROR: 'ASYNC_WORKERS_ERROR',
   ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: 'ADHOC_SUBQUERY_NOT_ALLOWED_ERROR',
   INVALID_SQL_ERROR: 'INVALID_SQL_ERROR',
+  RESULT_TOO_LARGE_ERROR: 'RESULT_TOO_LARGE_ERROR',
 
   // Generic errors
   GENERIC_COMMAND_ERROR: 'GENERIC_COMMAND_ERROR',
diff --git a/superset-frontend/src/setup/setupErrorMessages.ts 
b/superset-frontend/src/setup/setupErrorMessages.ts
index 442cd32152..e4c2380150 100644
--- a/superset-frontend/src/setup/setupErrorMessages.ts
+++ b/superset-frontend/src/setup/setupErrorMessages.ts
@@ -159,5 +159,9 @@ export default function setupErrorMessages() {
     ErrorTypeEnum.INVALID_SQL_ERROR,
     InvalidSQLErrorMessage,
   );
+  errorMessageComponentRegistry.registerValue(
+    ErrorTypeEnum.RESULT_TOO_LARGE_ERROR,
+    DatabaseErrorMessage,
+  );
   setupErrorMessagesExtra();
 }
diff --git a/superset/config.py b/superset/config.py
index 7ae46bba76..d19e30a5a5 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -960,6 +960,9 @@ SUPERSET_META_DB_LIMIT: int | None = 1000
 SQLLAB_SAVE_WARNING_MESSAGE = None
 SQLLAB_SCHEDULE_WARNING_MESSAGE = None
 
+# Max payload size (MB) for SQL Lab to prevent browser hangs with large 
results.
+SQLLAB_PAYLOAD_MAX_MB = None
+
 # Force refresh while auto-refresh in dashboard
 DASHBOARD_AUTO_REFRESH_MODE: Literal["fetch", "force"] = "force"
 # Dashboard auto refresh intervals
diff --git a/superset/errors.py b/superset/errors.py
index 9f0f5bed05..4d9c08d142 100644
--- a/superset/errors.py
+++ b/superset/errors.py
@@ -87,6 +87,7 @@ class SupersetErrorType(StrEnum):
     ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
     ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"
     INVALID_SQL_ERROR = "INVALID_SQL_ERROR"
+    RESULT_TOO_LARGE_ERROR = "RESULT_TOO_LARGE_ERROR"
 
     # Generic errors
     GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
@@ -151,6 +152,7 @@ ISSUE_CODES = {
     1036: _("The database was deleted."),
     1037: _("Custom SQL fields cannot contain sub-queries."),
     1040: _("The submitted payload failed validation."),
+    1041: _("The result size exceeds the allowed limit."),
 }
 
 
@@ -190,6 +192,7 @@ ERROR_TYPES_TO_ISSUE_CODES_MAPPING = {
     SupersetErrorType.DATABASE_NOT_FOUND_ERROR: [1011, 1036],
     SupersetErrorType.CONNECTION_DATABASE_TIMEOUT: [1001, 1009],
     SupersetErrorType.MARSHMALLOW_ERROR: [1040],
+    SupersetErrorType.RESULT_TOO_LARGE_ERROR: [1041],
 }
 
 
diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 7685b4419c..88e1bc1aad 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -17,6 +17,7 @@
 # pylint: disable=consider-using-transaction
 import dataclasses
 import logging
+import sys
 import uuid
 from contextlib import closing
 from datetime import datetime
@@ -78,6 +79,7 @@ SQL_MAX_ROW = config["SQL_MAX_ROW"]
 SQLLAB_CTAS_NO_LIMIT = config["SQLLAB_CTAS_NO_LIMIT"]
 log_query = config["QUERY_LOGGER"]
 logger = logging.getLogger(__name__)
+BYTES_IN_MB = 1024 * 1024
 
 
 class SqlLabException(Exception):
@@ -531,6 +533,7 @@ def execute_sql_statements(
                     log_params,
                     apply_ctas,
                 )
+
             except SqlLabQueryStoppedException:
                 payload.update({"status": QueryStatus.STOPPED})
                 return payload
@@ -601,6 +604,22 @@ def execute_sql_statements(
                 serialized_payload = _serialize_payload(
                     payload, cast(bool, results_backend_use_msgpack)
                 )
+
+                # Check the size of the serialized payload
+                if sql_lab_payload_max_mb := 
config.get("SQLLAB_PAYLOAD_MAX_MB"):
+                    serialized_payload_size = sys.getsizeof(serialized_payload)
+                    max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
+
+                    if serialized_payload_size > max_bytes:
+                        logger.info("Result size exceeds the allowed limit.")
+                        raise SupersetErrorException(
+                            SupersetError(
+                                message=f"Result size 
({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of 
{sql_lab_payload_max_mb} MB.",
+                                
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
+                                level=ErrorLevel.ERROR,
+                            )
+                        )
+
             cache_timeout = database.cache_timeout
             if cache_timeout is None:
                 cache_timeout = config["CACHE_DEFAULT_TIMEOUT"]
@@ -635,6 +654,23 @@ def execute_sql_statements(
                     "expanded_columns": expanded_columns,
                 }
             )
+        # Check the size of the serialized payload (opt-in logic for 
return_results)
+        if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
+            serialized_payload = _serialize_payload(
+                payload, cast(bool, results_backend_use_msgpack)
+            )
+            serialized_payload_size = sys.getsizeof(serialized_payload)
+            max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
+
+            if serialized_payload_size > max_bytes:
+                logger.info("Result size exceeds the allowed limit.")
+                raise SupersetErrorException(
+                    SupersetError(
+                        message=f"Result size ({serialized_payload_size / 
BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
+                        error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
+                        level=ErrorLevel.ERROR,
+                    )
+                )
         return payload
 
     return None
diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py
index 83093352bc..cc9b146f39 100644
--- a/tests/unit_tests/sql_lab_test.py
+++ b/tests/unit_tests/sql_lab_test.py
@@ -17,8 +17,10 @@
 # pylint: disable=import-outside-toplevel, invalid-name, unused-argument, 
too-many-locals
 
 import json
+from unittest import mock
 from uuid import UUID
 
+import pytest
 import sqlparse
 from freezegun import freeze_time
 from pytest_mock import MockerFixture
@@ -27,9 +29,9 @@ from sqlalchemy.orm.session import Session
 from superset import db
 from superset.common.db_query_status import QueryStatus
 from superset.errors import ErrorLevel, SupersetErrorType
-from superset.exceptions import OAuth2Error
+from superset.exceptions import OAuth2Error, SupersetErrorException
 from superset.models.core import Database
-from superset.sql_lab import get_sql_results
+from superset.sql_lab import execute_sql_statements, get_sql_results
 from superset.utils.core import override_user
 from tests.unit_tests.models.core_test import oauth2_client_info
 
@@ -125,6 +127,115 @@ def test_execute_sql_statement_with_rls(
     SupersetResultSet.assert_called_with([(42,)], cursor.description, 
db_engine_spec)
 
 
[email protected](
+    "superset.sql_lab.config",
+    {"SQLLAB_PAYLOAD_MAX_MB": 50},  # Set the desired config value for testing
+)
+def test_execute_sql_statement_exceeds_payload_limit(mocker: MockerFixture) -> 
None:
+    """
+    Test for `execute_sql_statements` when the result payload size exceeds the 
limit.
+    """
+
+    # Mock the query object and database
+    query = mocker.MagicMock()
+    query.limit = 1
+    query.database = mocker.MagicMock()
+    query.database.db_engine_spec.is_select_query.return_value = True
+    query.database.cache_timeout = 100
+    query.status = "RUNNING"
+    query.select_as_cta = False
+    query.database.allow_run_async = True
+
+    # Mock get_query to return our mocked query object
+    mocker.patch("superset.sql_lab.get_query", return_value=query)
+
+    # Mock sys.getsizeof to simulate a large payload size
+    mocker.patch("sys.getsizeof", return_value=100000000)  # 100 MB
+
+    # Mock _serialize_payload
+    def mock_serialize_payload(payload, use_msgpack):
+        return "serialized_payload"
+
+    mocker.patch(
+        "superset.sql_lab._serialize_payload", 
side_effect=mock_serialize_payload
+    )
+
+    # Mock db.session.refresh to avoid AttributeError during session refresh
+    mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
+
+    # Mock the results backend to avoid "Results backend is not configured" 
error
+    mocker.patch("superset.sql_lab.results_backend", return_value=True)
+
+    # Test that the exception is raised when the payload exceeds the limit
+    with pytest.raises(SupersetErrorException):
+        execute_sql_statements(
+            query_id=1,
+            rendered_query="SELECT 42 AS answer",
+            return_results=True,  # Simulate that results are being returned
+            store_results=True,  # Not storing results but returning them
+            start_time=None,
+            expand_data=False,
+            log_params={},
+        )
+
+
[email protected](
+    "superset.sql_lab.config",
+    {"SQLLAB_PAYLOAD_MAX_MB": 50},  # Set the desired config value for testing
+)
+def test_execute_sql_statement_within_payload_limit(mocker: MockerFixture) -> 
None:
+    """
+    Test for `execute_sql_statements` when the result payload size is within 
the limit,
+    and check if the flow executes smoothly without raising any exceptions.
+    """
+
+    # Mock the query object and database
+    query = mocker.MagicMock()
+    query.limit = 1
+    query.database = mocker.MagicMock()
+    query.database.db_engine_spec.is_select_query.return_value = True
+    query.database.cache_timeout = 100
+    query.status = "RUNNING"
+    query.select_as_cta = False
+    query.database.allow_run_async = True
+
+    # Mock get_query to return our mocked query object
+    mocker.patch("superset.sql_lab.get_query", return_value=query)
+
+    # Mock sys.getsizeof to simulate a payload size that is within the limit
+    mocker.patch("sys.getsizeof", return_value=10000000)  # 10 MB (within 
limit)
+
+    # Mock _serialize_payload
+    def mock_serialize_payload(payload, use_msgpack):
+        return "serialized_payload"
+
+    mocker.patch(
+        "superset.sql_lab._serialize_payload", 
side_effect=mock_serialize_payload
+    )
+
+    # Mock db.session.refresh to avoid AttributeError during session refresh
+    mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
+
+    # Mock the results backend to avoid "Results backend is not configured" 
error
+    mocker.patch("superset.sql_lab.results_backend", return_value=True)
+
+    # Test that no exception is raised and the function executes smoothly
+    try:
+        execute_sql_statements(
+            query_id=1,
+            rendered_query="SELECT 42 AS answer",
+            return_results=True,  # Simulate that results are being returned
+            store_results=True,  # Not storing results but returning them
+            start_time=None,
+            expand_data=False,
+            log_params={},
+        )
+    except SupersetErrorException:
+        pytest.fail(
+            "SupersetErrorException should not have been raised for payload 
within the limit"
+        )
+
+
 def test_sql_lab_insert_rls_as_subquery(
     mocker: MockerFixture,
     session: Session,

Reply via email to