This is an automated email from the ASF dual-hosted git repository.
arivero pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 6844735a45 fix(time_offset): improved LIMIT-handling in advanced
analytics (#27934)
6844735a45 is described below
commit 6844735a4513fb747780b346441f8da5107d0fe5
Author: Antonio Rivero <[email protected]>
AuthorDate: Fri Apr 12 00:54:21 2024 +0200
fix(time_offset): improved LIMIT-handling in advanced analytics (#27934)
---
superset/common/query_context_processor.py | 7 +++
tests/integration_tests/query_context_tests.py | 67 +++++++++++++++++++++++++-
2 files changed, 73 insertions(+), 1 deletion(-)
diff --git a/superset/common/query_context_processor.py
b/superset/common/query_context_processor.py
index f003f8ed30..55c80386a3 100644
--- a/superset/common/query_context_processor.py
+++ b/superset/common/query_context_processor.py
@@ -455,6 +455,13 @@ class QueryContextProcessor:
for metric in metric_names
}
+ # When the original query has limit or offset we wont apply those
+ # to the subquery so we prevent data inconsistency due to missing
records
+ # in the dataframes when performing the join
+ if query_object.row_limit or query_object.row_offset:
+ query_object_clone_dct["row_limit"] = config["ROW_LIMIT"]
+ query_object_clone_dct["row_offset"] = 0
+
if isinstance(self._qc_datasource, Query):
result = self._qc_datasource.exc_query(query_object_clone_dct)
else:
diff --git a/tests/integration_tests/query_context_tests.py
b/tests/integration_tests/query_context_tests.py
index dc400de5ef..d36b6c85c1 100644
--- a/tests/integration_tests/query_context_tests.py
+++ b/tests/integration_tests/query_context_tests.py
@@ -17,13 +17,14 @@
import re
import time
from typing import Any
+from unittest.mock import Mock, patch
import numpy as np
import pandas as pd
import pytest
from pandas import DateOffset
-from superset import db
+from superset import app, db
from superset.charts.schemas import ChartDataQueryContextSchema
from superset.common.chart_data import ChartDataResultFormat,
ChartDataResultType
from superset.common.query_context import QueryContext
@@ -674,6 +675,70 @@ class TestQueryContext(SupersetTestCase):
== df_3_years_later.loc[index]["sum__num"]
)
+ @pytest.mark.usefixtures("load_birth_names_dashboard_with_slices")
+ @patch("superset.common.query_context.QueryContext.get_query_result")
+ def test_time_offsets_in_query_object_no_limit(self, query_result_mock):
+ """
+ Ensure that time_offsets can generate the correct queries and
+ it doesnt use the row_limit nor row_offset from the original
+ query object
+ """
+ payload = get_query_context("birth_names")
+ payload["queries"][0]["columns"] = [
+ {
+ "timeGrain": "P1D",
+ "columnType": "BASE_AXIS",
+ "sqlExpression": "ds",
+ "label": "ds",
+ "expressionType": "SQL",
+ }
+ ]
+ payload["queries"][0]["metrics"] = ["sum__num"]
+ payload["queries"][0]["groupby"] = ["name"]
+ payload["queries"][0]["is_timeseries"] = True
+ payload["queries"][0]["row_limit"] = 100
+ payload["queries"][0]["row_offset"] = 10
+ payload["queries"][0]["time_range"] = "1990 : 1991"
+
+ initial_data = {
+ "__timestamp": ["1990-01-01", "1990-01-01"],
+ "name": ["zban", "ahwb"],
+ "sum__num": [43571, 27225],
+ }
+ initial_df = pd.DataFrame(initial_data)
+
+ mock_query_result = Mock()
+ mock_query_result.df = initial_df
+ side_effects = [mock_query_result]
+ query_result_mock.side_effect = side_effects
+ # Proceed with the test as before
+ query_context = ChartDataQueryContextSchema().load(payload)
+ query_object = query_context.queries[0]
+ # First call to get_query_result, should return initial_df
+ query_result = query_context.get_query_result(query_object)
+ df = query_result.df
+ # Setup the payload for time offsets
+ payload["queries"][0]["time_offsets"] = ["1 year ago", "1 year later"]
+ query_context = ChartDataQueryContextSchema().load(payload)
+ query_object = query_context.queries[0]
+ time_offsets_obj = query_context.processing_time_offsets(df,
query_object)
+ sqls = time_offsets_obj["queries"]
+ row_limit_value = app.config["ROW_LIMIT"]
+ row_limit_pattern_with_config_value = r"LIMIT " + re.escape(
+ str(row_limit_value)
+ )
+ self.assertEqual(len(sqls), 2)
+ # 1 year ago
+ assert re.search(r"1989-01-01.+1990-01-01", sqls[0], re.S)
+ assert not re.search(r"LIMIT 100", sqls[0], re.S)
+ assert not re.search(r"OFFSET 10", sqls[0], re.S)
+ assert re.search(row_limit_pattern_with_config_value, sqls[0], re.S)
+ # 1 year later
+ assert re.search(r"1991-01-01.+1992-01-01", sqls[1], re.S)
+ assert not re.search(r"LIMIT 100", sqls[1], re.S)
+ assert not re.search(r"OFFSET 10", sqls[1], re.S)
+ assert re.search(row_limit_pattern_with_config_value, sqls[1], re.S)
+
def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
qc = QueryContextFactory().create(