This is an automated email from the ASF dual-hosted git repository. arivero pushed a commit to branch table-time-comparison in repository https://gitbox.apache.org/repos/asf/superset.git
commit 0d4bad4979a73199b37074ff36521dc38b84bf05 Author: Antonio Rivero <[email protected]> AuthorDate: Mon Mar 4 23:07:00 2024 +0100 Table with Time Comparison: - Add comments to help navigate the new JOIN query operation ofr time_comparison --- superset/connectors/sqla/models.py | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 6259628d5b..a872f75553 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -1438,12 +1438,25 @@ class SqlaTable( mutate: bool, instant_time_comparison_info: dict[str, Any], ) -> tuple[str, list[str]]: + """ + Main goal of this method is to create a JOIN between a given query object and + other that shifts the time filters. This is different from time_offsets because + we are not joining result sets but rather we're applying the JOIN at query level. + Use case: Compare paginated data in a Table Chart. But ideally can be leveraged by + anything that needs the experimental instant time comparison. + """ + # So we don't override the original QueryObject query_obj_clone = copy.copy(query_obj) final_query_sql = "" + # The inner query object doesn't need limits or offset query_obj_clone["row_limit"] = None query_obj_clone["row_offset"] = None + # Let's get what range should we be using when building the time_comparison shift + # This is computing the time_shift based on some predefined options of deltas instant_time_comparison_range = instant_time_comparison_info.get("range") if instant_time_comparison_range == InstantTimeComparison.CUSTOM: + # If it's a custom filter, we take the 1st temporal filter and change it with + # whatever value we received in the request as the custom filter. custom_filter = instant_time_comparison_info.get("filter", {}) temporal_filters = [ filter["col"] @@ -1462,23 +1475,28 @@ class SqlaTable( new_filters = temporal_filters + non_temporal_filters query_obj_clone["filter"] = new_filters if instant_time_comparison_range != InstantTimeComparison.CUSTOM: + # When not custom, we're supposed to use the predefined time ranges + # Year, Month, Week or Inherited query_obj_clone["extras"] = { **query_obj_clone.get("extras", {}), "instant_time_comparison_range": instant_time_comparison_range, } - sqlaq_2 = self.get_sqla_query(**query_obj_clone) + shifted_sqlaq = self.get_sqla_query(**query_obj_clone) + # We JOIN only over columns, not metrics or anything else since those cannot be + # joined join_columns = query_obj_clone.get("columns") or [] - sqla_query_a = sqlaq.sqla_query - sqla_query_b = sqlaq_2.sqla_query - sqla_query_b_subquery = sqla_query_b.subquery() - query_a_cte = sqla_query_a.cte("query_a_results") - column_names_a = [column.key for column in sqla_query_a.c] + original_query_a = sqlaq.sqla_query + shifted_query_b = shifted_sqlaq.sqla_query + shifted_query_b_subquery = shifted_query_b.subquery() + query_a_cte = original_query_a.cte("query_a_results") + column_names_a = [column.key for column in original_query_a.c] exclude_columns_b = set(query_obj_clone.get("columns") or []) + # Let's prepare the columns set to be used in query A and B selected_columns_a = [query_a_cte.c[col].label(col) for col in column_names_a] # Renamed columns from Query B (with "prev_" prefix) selected_columns_b = [ - sqla_query_b_subquery.c[col].label(f"prev_{col}") - for col in sqla_query_b_subquery.c.keys() + shifted_query_b_subquery.c[col].label(f"prev_{col}") + for col in shifted_query_b_subquery.c.keys() if col not in exclude_columns_b ] # Combine selected columns from both queries @@ -1486,25 +1504,30 @@ class SqlaTable( if join_columns and not query_obj_clone.get("is_rowcount"): # Proceed with JOIN operation as before since join_columns is not empty join_conditions = [ - sqla_query_b_subquery.c[col] == query_a_cte.c[col] + shifted_query_b_subquery.c[col] == query_a_cte.c[col] for col in join_columns - if col in sqla_query_b_subquery.c and col in query_a_cte.c + if col in shifted_query_b_subquery.c and col in query_a_cte.c ] final_query = sa.select(*final_selected_columns).select_from( - sqla_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions)) + shifted_query_b_subquery.join(query_a_cte, sa.and_(*join_conditions)) ) else: + # When dealing with queries that have no columns or that are totals, + # rowcounts etc we join with the 1 = 1 to create a result set that have + # both sets (original and prev) final_query = sa.select(*final_selected_columns).select_from( - sqla_query_b_subquery.join( + shifted_query_b_subquery.join( query_a_cte, sa.literal(True) == sa.literal(True) ) ) + # Transform the query as you would within get_query_str_extended final_query_sql = self.database.compile_sqla_query(final_query) final_query_sql = self._apply_cte(final_query_sql, sqlaq.cte) final_query_sql = sqlparse.format(final_query_sql, reindent=True) if mutate: final_query_sql = self.mutate_query_from_config(final_query_sql) + # Prepare the labels for the columns to be used labels_expected = self.extract_column_names(final_selected_columns) return final_query_sql, labels_expected @@ -1520,6 +1543,8 @@ class SqlaTable( sql = self._apply_cte(sql, sqlaq.cte) sql = sqlparse.format(sql, reindent=True) + # Need to tell apart the regular queries from the ones that need + # Time comparison query_obj_clone = copy.copy(query_obj) query_object_extras: dict[str, Any] = query_obj.get("extras", {}) instant_time_comparison_info = query_object_extras.get(
