This is an automated email from the ASF dual-hosted git repository.
villebro pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 6ede3271ff fix(SQL Lab): hang when result set size is too big (#30522)
6ede3271ff is described below
commit 6ede3271ff3e4c82a53a08e0dd18b35e01c4fa4d
Author: anamitraadhikari <[email protected]>
AuthorDate: Mon Oct 14 18:03:28 2024 -0700
fix(SQL Lab): hang when result set size is too big (#30522)
Co-authored-by: aadhikari <[email protected]>
Co-authored-by: Ville Brofeldt <[email protected]>
---
docs/static/resources/openapi.json | 3 +-
.../superset-ui-core/src/query/types/Query.ts | 1 +
superset-frontend/src/setup/setupErrorMessages.ts | 4 +
superset/config.py | 3 +
superset/errors.py | 3 +
superset/sql_lab.py | 36 +++++++
tests/unit_tests/sql_lab_test.py | 115 ++++++++++++++++++++-
7 files changed, 162 insertions(+), 3 deletions(-)
diff --git a/docs/static/resources/openapi.json
b/docs/static/resources/openapi.json
index a039bd3a2a..43781ac965 100644
--- a/docs/static/resources/openapi.json
+++ b/docs/static/resources/openapi.json
@@ -116,7 +116,8 @@
"GENERIC_BACKEND_ERROR",
"INVALID_PAYLOAD_FORMAT_ERROR",
"INVALID_PAYLOAD_SCHEMA_ERROR",
- "REPORT_NOTIFICATION_ERROR"
+ "REPORT_NOTIFICATION_ERROR",
+ "RESULT_TOO_LARGE_ERROR"
],
"type": "string"
},
diff --git
a/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
b/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
index 4feba17319..a83d17a937 100644
--- a/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
+++ b/superset-frontend/packages/superset-ui-core/src/query/types/Query.ts
@@ -227,6 +227,7 @@ export const ErrorTypeEnum = {
ASYNC_WORKERS_ERROR: 'ASYNC_WORKERS_ERROR',
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR: 'ADHOC_SUBQUERY_NOT_ALLOWED_ERROR',
INVALID_SQL_ERROR: 'INVALID_SQL_ERROR',
+ RESULT_TOO_LARGE_ERROR: 'RESULT_TOO_LARGE_ERROR',
// Generic errors
GENERIC_COMMAND_ERROR: 'GENERIC_COMMAND_ERROR',
diff --git a/superset-frontend/src/setup/setupErrorMessages.ts
b/superset-frontend/src/setup/setupErrorMessages.ts
index 442cd32152..e4c2380150 100644
--- a/superset-frontend/src/setup/setupErrorMessages.ts
+++ b/superset-frontend/src/setup/setupErrorMessages.ts
@@ -159,5 +159,9 @@ export default function setupErrorMessages() {
ErrorTypeEnum.INVALID_SQL_ERROR,
InvalidSQLErrorMessage,
);
+ errorMessageComponentRegistry.registerValue(
+ ErrorTypeEnum.RESULT_TOO_LARGE_ERROR,
+ DatabaseErrorMessage,
+ );
setupErrorMessagesExtra();
}
diff --git a/superset/config.py b/superset/config.py
index 7ae46bba76..d19e30a5a5 100644
--- a/superset/config.py
+++ b/superset/config.py
@@ -960,6 +960,9 @@ SUPERSET_META_DB_LIMIT: int | None = 1000
SQLLAB_SAVE_WARNING_MESSAGE = None
SQLLAB_SCHEDULE_WARNING_MESSAGE = None
+# Max payload size (MB) for SQL Lab to prevent browser hangs with large
results.
+SQLLAB_PAYLOAD_MAX_MB = None
+
# Force refresh while auto-refresh in dashboard
DASHBOARD_AUTO_REFRESH_MODE: Literal["fetch", "force"] = "force"
# Dashboard auto refresh intervals
diff --git a/superset/errors.py b/superset/errors.py
index 9f0f5bed05..4d9c08d142 100644
--- a/superset/errors.py
+++ b/superset/errors.py
@@ -87,6 +87,7 @@ class SupersetErrorType(StrEnum):
ASYNC_WORKERS_ERROR = "ASYNC_WORKERS_ERROR"
ADHOC_SUBQUERY_NOT_ALLOWED_ERROR = "ADHOC_SUBQUERY_NOT_ALLOWED_ERROR"
INVALID_SQL_ERROR = "INVALID_SQL_ERROR"
+ RESULT_TOO_LARGE_ERROR = "RESULT_TOO_LARGE_ERROR"
# Generic errors
GENERIC_COMMAND_ERROR = "GENERIC_COMMAND_ERROR"
@@ -151,6 +152,7 @@ ISSUE_CODES = {
1036: _("The database was deleted."),
1037: _("Custom SQL fields cannot contain sub-queries."),
1040: _("The submitted payload failed validation."),
+ 1041: _("The result size exceeds the allowed limit."),
}
@@ -190,6 +192,7 @@ ERROR_TYPES_TO_ISSUE_CODES_MAPPING = {
SupersetErrorType.DATABASE_NOT_FOUND_ERROR: [1011, 1036],
SupersetErrorType.CONNECTION_DATABASE_TIMEOUT: [1001, 1009],
SupersetErrorType.MARSHMALLOW_ERROR: [1040],
+ SupersetErrorType.RESULT_TOO_LARGE_ERROR: [1041],
}
diff --git a/superset/sql_lab.py b/superset/sql_lab.py
index 7685b4419c..88e1bc1aad 100644
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -17,6 +17,7 @@
# pylint: disable=consider-using-transaction
import dataclasses
import logging
+import sys
import uuid
from contextlib import closing
from datetime import datetime
@@ -78,6 +79,7 @@ SQL_MAX_ROW = config["SQL_MAX_ROW"]
SQLLAB_CTAS_NO_LIMIT = config["SQLLAB_CTAS_NO_LIMIT"]
log_query = config["QUERY_LOGGER"]
logger = logging.getLogger(__name__)
+BYTES_IN_MB = 1024 * 1024
class SqlLabException(Exception):
@@ -531,6 +533,7 @@ def execute_sql_statements(
log_params,
apply_ctas,
)
+
except SqlLabQueryStoppedException:
payload.update({"status": QueryStatus.STOPPED})
return payload
@@ -601,6 +604,22 @@ def execute_sql_statements(
serialized_payload = _serialize_payload(
payload, cast(bool, results_backend_use_msgpack)
)
+
+ # Check the size of the serialized payload
+ if sql_lab_payload_max_mb :=
config.get("SQLLAB_PAYLOAD_MAX_MB"):
+ serialized_payload_size = sys.getsizeof(serialized_payload)
+ max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
+
+ if serialized_payload_size > max_bytes:
+ logger.info("Result size exceeds the allowed limit.")
+ raise SupersetErrorException(
+ SupersetError(
+ message=f"Result size
({serialized_payload_size / BYTES_IN_MB:.2f} MB) exceeds the allowed limit of
{sql_lab_payload_max_mb} MB.",
+
error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
+ level=ErrorLevel.ERROR,
+ )
+ )
+
cache_timeout = database.cache_timeout
if cache_timeout is None:
cache_timeout = config["CACHE_DEFAULT_TIMEOUT"]
@@ -635,6 +654,23 @@ def execute_sql_statements(
"expanded_columns": expanded_columns,
}
)
+ # Check the size of the serialized payload (opt-in logic for
return_results)
+ if sql_lab_payload_max_mb := config.get("SQLLAB_PAYLOAD_MAX_MB"):
+ serialized_payload = _serialize_payload(
+ payload, cast(bool, results_backend_use_msgpack)
+ )
+ serialized_payload_size = sys.getsizeof(serialized_payload)
+ max_bytes = sql_lab_payload_max_mb * BYTES_IN_MB
+
+ if serialized_payload_size > max_bytes:
+ logger.info("Result size exceeds the allowed limit.")
+ raise SupersetErrorException(
+ SupersetError(
+ message=f"Result size ({serialized_payload_size /
BYTES_IN_MB:.2f} MB) exceeds the allowed limit of {sql_lab_payload_max_mb} MB.",
+ error_type=SupersetErrorType.RESULT_TOO_LARGE_ERROR,
+ level=ErrorLevel.ERROR,
+ )
+ )
return payload
return None
diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py
index 83093352bc..cc9b146f39 100644
--- a/tests/unit_tests/sql_lab_test.py
+++ b/tests/unit_tests/sql_lab_test.py
@@ -17,8 +17,10 @@
# pylint: disable=import-outside-toplevel, invalid-name, unused-argument,
too-many-locals
import json
+from unittest import mock
from uuid import UUID
+import pytest
import sqlparse
from freezegun import freeze_time
from pytest_mock import MockerFixture
@@ -27,9 +29,9 @@ from sqlalchemy.orm.session import Session
from superset import db
from superset.common.db_query_status import QueryStatus
from superset.errors import ErrorLevel, SupersetErrorType
-from superset.exceptions import OAuth2Error
+from superset.exceptions import OAuth2Error, SupersetErrorException
from superset.models.core import Database
-from superset.sql_lab import get_sql_results
+from superset.sql_lab import execute_sql_statements, get_sql_results
from superset.utils.core import override_user
from tests.unit_tests.models.core_test import oauth2_client_info
@@ -125,6 +127,115 @@ def test_execute_sql_statement_with_rls(
SupersetResultSet.assert_called_with([(42,)], cursor.description,
db_engine_spec)
[email protected](
+ "superset.sql_lab.config",
+ {"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
+)
+def test_execute_sql_statement_exceeds_payload_limit(mocker: MockerFixture) ->
None:
+ """
+ Test for `execute_sql_statements` when the result payload size exceeds the
limit.
+ """
+
+ # Mock the query object and database
+ query = mocker.MagicMock()
+ query.limit = 1
+ query.database = mocker.MagicMock()
+ query.database.db_engine_spec.is_select_query.return_value = True
+ query.database.cache_timeout = 100
+ query.status = "RUNNING"
+ query.select_as_cta = False
+ query.database.allow_run_async = True
+
+ # Mock get_query to return our mocked query object
+ mocker.patch("superset.sql_lab.get_query", return_value=query)
+
+ # Mock sys.getsizeof to simulate a large payload size
+ mocker.patch("sys.getsizeof", return_value=100000000) # 100 MB
+
+ # Mock _serialize_payload
+ def mock_serialize_payload(payload, use_msgpack):
+ return "serialized_payload"
+
+ mocker.patch(
+ "superset.sql_lab._serialize_payload",
side_effect=mock_serialize_payload
+ )
+
+ # Mock db.session.refresh to avoid AttributeError during session refresh
+ mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
+
+ # Mock the results backend to avoid "Results backend is not configured"
error
+ mocker.patch("superset.sql_lab.results_backend", return_value=True)
+
+ # Test that the exception is raised when the payload exceeds the limit
+ with pytest.raises(SupersetErrorException):
+ execute_sql_statements(
+ query_id=1,
+ rendered_query="SELECT 42 AS answer",
+ return_results=True, # Simulate that results are being returned
+ store_results=True, # Not storing results but returning them
+ start_time=None,
+ expand_data=False,
+ log_params={},
+ )
+
+
[email protected](
+ "superset.sql_lab.config",
+ {"SQLLAB_PAYLOAD_MAX_MB": 50}, # Set the desired config value for testing
+)
+def test_execute_sql_statement_within_payload_limit(mocker: MockerFixture) ->
None:
+ """
+ Test for `execute_sql_statements` when the result payload size is within
the limit,
+ and check if the flow executes smoothly without raising any exceptions.
+ """
+
+ # Mock the query object and database
+ query = mocker.MagicMock()
+ query.limit = 1
+ query.database = mocker.MagicMock()
+ query.database.db_engine_spec.is_select_query.return_value = True
+ query.database.cache_timeout = 100
+ query.status = "RUNNING"
+ query.select_as_cta = False
+ query.database.allow_run_async = True
+
+ # Mock get_query to return our mocked query object
+ mocker.patch("superset.sql_lab.get_query", return_value=query)
+
+ # Mock sys.getsizeof to simulate a payload size that is within the limit
+ mocker.patch("sys.getsizeof", return_value=10000000) # 10 MB (within
limit)
+
+ # Mock _serialize_payload
+ def mock_serialize_payload(payload, use_msgpack):
+ return "serialized_payload"
+
+ mocker.patch(
+ "superset.sql_lab._serialize_payload",
side_effect=mock_serialize_payload
+ )
+
+ # Mock db.session.refresh to avoid AttributeError during session refresh
+ mocker.patch("superset.sql_lab.db.session.refresh", return_value=None)
+
+ # Mock the results backend to avoid "Results backend is not configured"
error
+ mocker.patch("superset.sql_lab.results_backend", return_value=True)
+
+ # Test that no exception is raised and the function executes smoothly
+ try:
+ execute_sql_statements(
+ query_id=1,
+ rendered_query="SELECT 42 AS answer",
+ return_results=True, # Simulate that results are being returned
+ store_results=True, # Not storing results but returning them
+ start_time=None,
+ expand_data=False,
+ log_params={},
+ )
+ except SupersetErrorException:
+ pytest.fail(
+ "SupersetErrorException should not have been raised for payload
within the limit"
+ )
+
+
def test_sql_lab_insert_rls_as_subquery(
mocker: MockerFixture,
session: Session,