Re: [PR] refactor: refactor `get_query_result` [superset]

via GitHub Wed, 12 Nov 2025 10:28:04 -0800


Copilot commented on code in PR #36057:
URL: https://github.com/apache/superset/pull/36057#discussion_r2519362810



##########
superset/models/helpers.py:
##########
@@ -1140,6 +1180,755 @@ def assign_column_label(df: pd.DataFrame) -> 
Optional[pd.DataFrame]:
             error_message=error_message,
         )
 
+    def exc_query(self, qry: Any) -> QueryResult:
+        """
+        Deprecated: Use query() instead.
+        This method is kept for backward compatibility.
+        """
+        return self.query(qry)
+
+    def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> 
pd.DataFrame:
+        """
+        Normalize the dataframe by converting datetime columns and ensuring
+        numerical metrics.
+
+        :param df: The dataframe to normalize
+        :param query_object: The query object with metadata about columns
+        :return: Normalized dataframe
+        """
+
+        def _get_timestamp_format(column: str | None) -> str | None:
+            if not hasattr(self, "get_column"):
+                return None
+            column_obj = self.get_column(column)
+            if (
+                column_obj
+                and hasattr(column_obj, "python_date_format")
+                and (formatter := column_obj.python_date_format)
+            ):
+                return str(formatter)
+            return None
+
+        # Collect datetime columns
+        labels = tuple(
+            label
+            for label in [
+                *get_base_axis_labels(query_object.columns),
+                query_object.granularity,
+            ]
+            if hasattr(self, "get_column")
+            and (col := self.get_column(label))
+            and (col.get("is_dttm") if isinstance(col, dict) else col.is_dttm)
+        )
+
+        dttm_cols = [
+            DateColumn(
+                timestamp_format=_get_timestamp_format(label),
+                offset=self.offset,
+                time_shift=query_object.time_shift,
+                col_label=label,
+            )
+            for label in labels
+            if label
+        ]
+
+        if DTTM_ALIAS in df:
+            dttm_cols.append(
+                DateColumn.get_legacy_time_column(
+                    
timestamp_format=_get_timestamp_format(query_object.granularity),
+                    offset=self.offset,
+                    time_shift=query_object.time_shift,
+                )
+            )
+
+        normalize_dttm_col(
+            df=df,
+            dttm_cols=tuple(dttm_cols),
+        )
+
+        # Convert metrics to numerical values if enforced
+        if getattr(self, "enforce_numerical_metrics", True):
+            dataframe_utils.df_metrics_to_num(df, query_object)
+
+        df.replace([np.inf, -np.inf], np.nan, inplace=True)
+
+        return df
+
+    def get_query_result(self, query_object: QueryObject) -> QueryResult:
+        """
+        Execute query and return results with full processing pipeline.
+
+        This method handles:
+        1. Query execution via self.query()
+        2. DataFrame normalization
+        3. Time offset processing (if applicable)
+        4. Post-processing operations
+
+        :param query_object: The query configuration
+        :return: QueryResult with processed dataframe
+        """
+        # Execute the base query
+        result = self.query(query_object.to_dict())
+        query = result.query + ";\n\n" if result.query else ""
+
+        # Process the dataframe if not empty
+        df = result.df
+        if not df.empty:
+            # Normalize datetime columns and metrics
+            df = self.normalize_df(df, query_object)
+
+            # Process time offsets if requested
+            if query_object.time_offsets:
+                # Process time offsets using the datasource's own method
+                # Note: caching is disabled here as we don't have query context
+                time_offsets = self.processing_time_offsets(
+                    df, query_object, cache_key_fn=None, cache_timeout_fn=None
+                )
+                df = time_offsets["df"]
+                queries = time_offsets["queries"]
+                query += ";\n\n".join(queries)
+                query += ";\n\n"
+
+            # Execute post-processing operations
+            try:
+                df = query_object.exec_post_processing(df)
+            except InvalidPostProcessingError as ex:
+                raise QueryObjectValidationError(ex.message) from ex
+
+        # Update result with processed data
+        result.df = df
+        result.query = query
+        result.from_dttm = query_object.from_dttm
+        result.to_dttm = query_object.to_dttm
+
+        return result
+
+    def processing_time_offsets(  # pylint: 
disable=too-many-locals,too-many-statements # noqa: C901
+        self,
+        df: pd.DataFrame,
+        query_object: QueryObject,
+        cache_key_fn: Callable[[QueryObject, str, Any], str | None] | None = 
None,
+        cache_timeout_fn: Callable[[], int] | None = None,
+        force_cache: bool = False,
+    ) -> CachedTimeOffset:
+        """
+        Process time offsets for time comparison feature.
+
+        This method handles both relative time offsets (e.g., "1 week ago") and
+        absolute date range offsets (e.g., "2015-01-03 : 2015-01-04").
+
+        :param df: The main dataframe
+        :param query_object: The query object with time offset configuration
+        :param cache_key_fn: Optional function to generate cache keys
+        :param cache_timeout_fn: Optional function to get cache timeout
+        :param force_cache: Whether to force cache refresh
+        :return: CachedTimeOffset with processed dataframe and queries
+        """
+        # Import here to avoid circular dependency
+        # pylint: disable=import-outside-toplevel
+        from superset.common.utils.query_cache_manager import QueryCacheManager
+
+        # ensure query_object is immutable
+        query_object_clone = copy.copy(query_object)
+        queries: list[str] = []
+        cache_keys: list[str | None] = []
+        offset_dfs: dict[str, pd.DataFrame] = {}
+
+        outer_from_dttm, outer_to_dttm = 
get_since_until_from_query_object(query_object)
+        if not outer_from_dttm or not outer_to_dttm:
+            raise QueryObjectValidationError(
+                _(
+                    "An enclosed time range (both start and end) must be 
specified "
+                    "when using a Time Comparison."
+                )
+            )
+
+        time_grain = self.get_time_grain(query_object)
+        metric_names = get_metric_names(query_object.metrics)
+        # use columns that are not metrics as join keys
+        join_keys = [col for col in df.columns if col not in metric_names]
+
+        for offset in query_object.time_offsets:
+            try:
+                original_offset = offset
+                is_date_range_offset = self.is_valid_date_range(offset)
+
+                if is_date_range_offset and 
feature_flag_manager.is_feature_enabled(
+                    "DATE_RANGE_TIMESHIFTS_ENABLED"
+                ):
+                    # DATE RANGE OFFSET LOGIC (like "2015-01-03 : 2015-01-04")
+                    try:
+                        # Parse the specified range
+                        offset_from_dttm, offset_to_dttm = (
+                            get_since_until_from_time_range(time_range=offset)
+                        )
+                    except ValueError as ex:
+                        raise QueryObjectValidationError(str(ex)) from ex
+
+                    # Use the specified range directly
+                    query_object_clone.from_dttm = offset_from_dttm
+                    query_object_clone.to_dttm = offset_to_dttm
+
+                    # For date range offsets, we must NOT set inner bounds
+                    # These create additional WHERE clauses that conflict with 
our
+                    # date range
+                    query_object_clone.inner_from_dttm = None
+                    query_object_clone.inner_to_dttm = None
+
+                elif is_date_range_offset:
+                    # Date range timeshift feature is disabled
+                    raise QueryObjectValidationError(
+                        "Date range timeshifts are not enabled. "
+                        "Please contact your administrator to enable the "
+                        "DATE_RANGE_TIMESHIFTS_ENABLED feature flag."
+                    )
+
+                else:
+                    # RELATIVE OFFSET LOGIC (like "1 day ago")
+                    if self.is_valid_date(offset) or offset == "inherit":
+                        offset = self.get_offset_custom_or_inherit(
+                            offset,
+                            outer_from_dttm,
+                            outer_to_dttm,
+                        )
+                    query_object_clone.from_dttm = get_past_or_future(
+                        offset,
+                        outer_from_dttm,
+                    )
+                    query_object_clone.to_dttm = get_past_or_future(
+                        offset, outer_to_dttm
+                    )
+
+                    query_object_clone.inner_from_dttm = 
query_object_clone.from_dttm
+                    query_object_clone.inner_to_dttm = 
query_object_clone.to_dttm
+
+                x_axis_label = get_x_axis_label(query_object.columns)
+                query_object_clone.granularity = (
+                    query_object_clone.granularity or x_axis_label
+                )
+
+            except ValueError as ex:
+                raise QueryObjectValidationError(str(ex)) from ex
+
+            query_object_clone.time_offsets = []
+            query_object_clone.post_processing = []
+
+            # Get time offset index
+            index = (get_base_axis_labels(query_object.columns) or 
[DTTM_ALIAS])[0]
+
+            if is_date_range_offset and 
feature_flag_manager.is_feature_enabled(
+                "DATE_RANGE_TIMESHIFTS_ENABLED"
+            ):
+                # Create a completely new filter list to preserve original 
filters
+                query_object_clone.filter = 
copy.deepcopy(query_object_clone.filter)
+
+                # Remove any existing temporal filters that might conflict
+                query_object_clone.filter = [
+                    flt
+                    for flt in query_object_clone.filter
+                    if not (flt.get("op") == FilterOperator.TEMPORAL_RANGE)
+                ]
+
+                # Determine the temporal column with multiple fallback 
strategies
+                temporal_col = self._get_temporal_column_for_filter(
+                    query_object_clone, x_axis_label
+                )
+
+                # Always add a temporal filter for date range offsets
+                if temporal_col:
+                    new_temporal_filter: QueryObjectFilterClause = {
+                        "col": temporal_col,
+                        "op": FilterOperator.TEMPORAL_RANGE,
+                        "val": (
+                            f"{query_object_clone.from_dttm} : "
+                            f"{query_object_clone.to_dttm}"
+                        ),
+                    }
+                    query_object_clone.filter.append(new_temporal_filter)
+
+                else:
+                    # This should rarely happen with proper fallbacks
+                    raise QueryObjectValidationError(
+                        _(
+                            "Unable to identify temporal column for date range 
time comparison."  # noqa: E501

Review Comment:
   Missing space between error message strings. The two string literals will be 
concatenated without a space between "comparison." and "Please", resulting in 
"...comparison.Please ensure...". Add a space at the end of the first string or 
beginning of the second string.
   ```suggestion
                               "Unable to identify temporal column for date 
range time comparison. "  # noqa: E501
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] refactor: refactor `get_query_result` [superset]

Reply via email to