Re: [PR] chore: refactor `get_query_result` [superset]

via GitHub Mon, 10 Nov 2025 14:10:03 -0800


korbit-ai[bot] commented on code in PR #36057:
URL: https://github.com/apache/superset/pull/36057#discussion_r2512127864



##########
superset/common/query_context_processor.py:
##########
@@ -266,726 +220,14 @@ def query_cache_key(self, query_obj: QueryObject, 
**kwargs: Any) -> str | None:
         return cache_key
 
     def get_query_result(self, query_object: QueryObject) -> QueryResult:
-        """Returns a pandas dataframe based on the query object"""
-        query_context = self._query_context
-        # Here, we assume that all the queries will use the same datasource, 
which is
-        # a valid assumption for current setting. In the long term, we may
-        # support multiple queries from different data sources.
-
-        query = ""
-        if isinstance(query_context.datasource, Query):
-            # todo(hugh): add logic to manage all sip68 models here
-            result = query_context.datasource.exc_query(query_object.to_dict())
-        else:
-            result = query_context.datasource.query(query_object.to_dict())
-            query = result.query + ";\n\n"
-
-        df = result.df
-        # Transform the timestamp we received from database to pandas supported
-        # datetime format. If no python_date_format is specified, the pattern 
will
-        # be considered as the default ISO date format
-        # If the datetime format is unix, the parse will use the corresponding
-        # parsing logic
-        if not df.empty:
-            df = self.normalize_df(df, query_object)
-
-            if query_object.time_offsets:
-                time_offsets = self.processing_time_offsets(df, query_object)
-                df = time_offsets["df"]
-                queries = time_offsets["queries"]
-
-                query += ";\n\n".join(queries)
-                query += ";\n\n"
-
-            # Re-raising QueryObjectValidationError
-            try:
-                df = query_object.exec_post_processing(df)
-            except InvalidPostProcessingError as ex:
-                raise QueryObjectValidationError(ex.message) from ex
-
-        result.df = df
-        result.query = query
-        result.from_dttm = query_object.from_dttm
-        result.to_dttm = query_object.to_dttm
-        return result
-
-    def normalize_df(self, df: pd.DataFrame, query_object: QueryObject) -> 
pd.DataFrame:
-        # todo: should support "python_date_format" and "get_column" in each 
datasource
-        def _get_timestamp_format(
-            source: BaseDatasource, column: str | None
-        ) -> str | None:
-            column_obj = source.get_column(column)
-            if (
-                column_obj
-                # only sqla column was supported
-                and hasattr(column_obj, "python_date_format")
-                and (formatter := column_obj.python_date_format)
-            ):
-                return str(formatter)
-
-            return None
-
-        datasource = self._qc_datasource
-        labels = tuple(
-            label
-            for label in [
-                *get_base_axis_labels(query_object.columns),
-                query_object.granularity,
-            ]
-            if datasource
-            # Query datasource didn't support `get_column`
-            and hasattr(datasource, "get_column")
-            and (col := datasource.get_column(label))
-            # todo(hugh) standardize column object in Query datasource
-            and (col.get("is_dttm") if isinstance(col, dict) else col.is_dttm)
-        )
-        dttm_cols = [
-            DateColumn(
-                timestamp_format=_get_timestamp_format(datasource, label),
-                offset=datasource.offset,
-                time_shift=query_object.time_shift,
-                col_label=label,
-            )
-            for label in labels
-            if label
-        ]
-        if DTTM_ALIAS in df:
-            dttm_cols.append(
-                DateColumn.get_legacy_time_column(
-                    timestamp_format=_get_timestamp_format(
-                        datasource, query_object.granularity
-                    ),
-                    offset=datasource.offset,
-                    time_shift=query_object.time_shift,
-                )
-            )
-        normalize_dttm_col(
-            df=df,
-            dttm_cols=tuple(dttm_cols),
-        )
-
-        if self.enforce_numerical_metrics:
-            dataframe_utils.df_metrics_to_num(df, query_object)
-
-        df.replace([np.inf, -np.inf], np.nan, inplace=True)
-
-        return df
-
-    @staticmethod
-    def get_time_grain(query_object: QueryObject) -> Any | None:
-        if (
-            query_object.columns
-            and len(query_object.columns) > 0
-            and isinstance(query_object.columns[0], dict)
-        ):
-            # If the time grain is in the columns it will be the first one
-            # and it will be of AdhocColumn type
-            return query_object.columns[0].get("timeGrain")
-
-        return query_object.extras.get("time_grain_sqla")
-
-    # pylint: disable=too-many-arguments
-    def add_offset_join_column(
-        self,
-        df: pd.DataFrame,
-        name: str,
-        time_grain: str,
-        time_offset: str | None = None,
-        join_column_producer: Any = None,
-    ) -> None:
         """
-        Adds an offset join column to the provided DataFrame.
+        Returns a pandas dataframe based on the query object.
 
-        The function modifies the DataFrame in-place.
-
-        :param df: pandas DataFrame to which the offset join column will be 
added.
-        :param name: The name of the new column to be added.
-        :param time_grain: The time grain used to calculate the new column.
-        :param time_offset: The time offset used to calculate the new column.
-        :param join_column_producer: A function to generate the join column.
+        This method delegates to the datasource's get_query_result method,
+        which handles query execution, normalization, time offsets, and
+        post-processing.
         """
-        if join_column_producer:
-            df[name] = df.apply(lambda row: join_column_producer(row, 0), 
axis=1)
-        else:
-            df[name] = df.apply(
-                lambda row: self.generate_join_column(row, 0, time_grain, 
time_offset),
-                axis=1,
-            )
-
-    def is_valid_date(self, date_string: str) -> bool:
-        try:
-            # Attempt to parse the string as a date in the format YYYY-MM-DD
-            datetime.strptime(date_string, "%Y-%m-%d")
-            return True
-        except ValueError:
-            # If parsing fails, it's not a valid date in the format YYYY-MM-DD
-            return False
-
-    def is_valid_date_range(self, date_range: str) -> bool:
-        try:
-            # Attempt to parse the string as a date range in the format
-            # YYYY-MM-DD:YYYY-MM-DD
-            start_date, end_date = date_range.split(":")
-            datetime.strptime(start_date.strip(), "%Y-%m-%d")
-            datetime.strptime(end_date.strip(), "%Y-%m-%d")
-            return True
-        except ValueError:
-            # If parsing fails, it's not a valid date range in the format
-            # YYYY-MM-DD:YYYY-MM-DD
-            return False
-
-    def get_offset_custom_or_inherit(
-        self,
-        offset: str,
-        outer_from_dttm: datetime,
-        outer_to_dttm: datetime,
-    ) -> str:
-        """
-        Get the time offset for custom or inherit.
-
-        :param offset: The offset string.
-        :param outer_from_dttm: The outer from datetime.
-        :param outer_to_dttm: The outer to datetime.
-        :returns: The time offset.
-        """
-        if offset == "inherit":
-            # return the difference in days between the from and the to dttm 
formatted as a string with the " days ago" suffix  # noqa: E501
-            return f"{(outer_to_dttm - outer_from_dttm).days} days ago"
-        if self.is_valid_date(offset):
-            # return the offset as the difference in days between the outer 
from dttm and the offset date (which is a YYYY-MM-DD string) formatted as a 
string with the " days ago" suffix  # noqa: E501
-            offset_date = datetime.strptime(offset, "%Y-%m-%d")
-            return f"{(outer_from_dttm - offset_date).days} days ago"
-        return ""
-
-    def processing_time_offsets(  # pylint: 
disable=too-many-locals,too-many-statements # noqa: C901
-        self,
-        df: pd.DataFrame,
-        query_object: QueryObject,
-    ) -> CachedTimeOffset:
-        """
-        Process time offsets for time comparison feature.
-
-        This method handles both relative time offsets (e.g., "1 week ago") and
-        absolute date range offsets (e.g., "2015-01-03 : 2015-01-04").
-        """
-        query_context = self._query_context
-        # ensure query_object is immutable
-        query_object_clone = copy.copy(query_object)
-        queries: list[str] = []
-        cache_keys: list[str | None] = []
-        offset_dfs: dict[str, pd.DataFrame] = {}
-
-        outer_from_dttm, outer_to_dttm = 
get_since_until_from_query_object(query_object)
-        if not outer_from_dttm or not outer_to_dttm:
-            raise QueryObjectValidationError(
-                _(
-                    "An enclosed time range (both start and end) must be 
specified "
-                    "when using a Time Comparison."
-                )
-            )
-
-        time_grain = self.get_time_grain(query_object)
-        metric_names = get_metric_names(query_object.metrics)
-        # use columns that are not metrics as join keys
-        join_keys = [col for col in df.columns if col not in metric_names]
-
-        for offset in query_object.time_offsets:
-            try:
-                original_offset = offset
-                is_date_range_offset = self.is_valid_date_range(offset)
-
-                if is_date_range_offset and 
feature_flag_manager.is_feature_enabled(
-                    "DATE_RANGE_TIMESHIFTS_ENABLED"
-                ):
-                    # DATE RANGE OFFSET LOGIC (like "2015-01-03 : 2015-01-04")
-                    try:
-                        # Parse the specified range
-                        offset_from_dttm, offset_to_dttm = (
-                            get_since_until_from_time_range(time_range=offset)
-                        )
-                    except ValueError as ex:
-                        raise QueryObjectValidationError(str(ex)) from ex
-
-                    # Use the specified range directly
-                    query_object_clone.from_dttm = offset_from_dttm
-                    query_object_clone.to_dttm = offset_to_dttm
-
-                    # For date range offsets, we must NOT set inner bounds
-                    # These create additional WHERE clauses that conflict with 
our
-                    # date range
-                    query_object_clone.inner_from_dttm = None
-                    query_object_clone.inner_to_dttm = None
-
-                elif is_date_range_offset:
-                    # Date range timeshift feature is disabled
-                    raise QueryObjectValidationError(
-                        "Date range timeshifts are not enabled. "
-                        "Please contact your administrator to enable the "
-                        "DATE_RANGE_TIMESHIFTS_ENABLED feature flag."
-                    )
-
-                else:
-                    # RELATIVE OFFSET LOGIC (like "1 day ago")
-                    if self.is_valid_date(offset) or offset == "inherit":
-                        offset = self.get_offset_custom_or_inherit(
-                            offset,
-                            outer_from_dttm,
-                            outer_to_dttm,
-                        )
-                    query_object_clone.from_dttm = get_past_or_future(
-                        offset,
-                        outer_from_dttm,
-                    )
-                    query_object_clone.to_dttm = get_past_or_future(
-                        offset, outer_to_dttm
-                    )
-
-                    query_object_clone.inner_from_dttm = 
query_object_clone.from_dttm
-                    query_object_clone.inner_to_dttm = 
query_object_clone.to_dttm
-
-                x_axis_label = get_x_axis_label(query_object.columns)
-                query_object_clone.granularity = (
-                    query_object_clone.granularity or x_axis_label
-                )
-
-            except ValueError as ex:
-                raise QueryObjectValidationError(str(ex)) from ex
-
-            query_object_clone.time_offsets = []
-            query_object_clone.post_processing = []
-
-            # Get time offset index
-            index = (get_base_axis_labels(query_object.columns) or 
[DTTM_ALIAS])[0]
-
-            if is_date_range_offset and 
feature_flag_manager.is_feature_enabled(
-                "DATE_RANGE_TIMESHIFTS_ENABLED"
-            ):
-                # Create a completely new filter list to preserve original 
filters
-                query_object_clone.filter = 
copy.deepcopy(query_object_clone.filter)
-
-                # Remove any existing temporal filters that might conflict
-                query_object_clone.filter = [
-                    flt
-                    for flt in query_object_clone.filter
-                    if not (flt.get("op") == FilterOperator.TEMPORAL_RANGE)
-                ]
-
-                # Determine the temporal column with multiple fallback 
strategies
-                temporal_col = self._get_temporal_column_for_filter(
-                    query_object_clone, x_axis_label
-                )
-
-                # Always add a temporal filter for date range offsets
-                if temporal_col:
-                    new_temporal_filter: QueryObjectFilterClause = {
-                        "col": temporal_col,
-                        "op": FilterOperator.TEMPORAL_RANGE,
-                        "val": (
-                            f"{query_object_clone.from_dttm} : "
-                            f"{query_object_clone.to_dttm}"
-                        ),
-                    }
-                    query_object_clone.filter.append(new_temporal_filter)
-
-                else:
-                    # This should rarely happen with proper fallbacks
-                    raise QueryObjectValidationError(
-                        _(
-                            "Unable to identify temporal column for date range 
time comparison."  # noqa: E501
-                            "Please ensure your dataset has a properly 
configured time column."  # noqa: E501
-                        )
-                    )
-
-            else:
-                # RELATIVE OFFSET: Original logic for non-date-range offsets
-                # The comparison is not using a temporal column so we need to 
modify
-                # the temporal filter so we run the query with the correct 
time range
-                if not dataframe_utils.is_datetime_series(df.get(index)):
-                    query_object_clone.filter = 
copy.deepcopy(query_object_clone.filter)
-
-                    # Find and update temporal filters
-                    for flt in query_object_clone.filter:
-                        if flt.get(
-                            "op"
-                        ) == FilterOperator.TEMPORAL_RANGE and isinstance(
-                            flt.get("val"), str
-                        ):
-                            time_range = cast(str, flt.get("val"))
-                            (
-                                new_outer_from_dttm,
-                                new_outer_to_dttm,
-                            ) = get_since_until_from_time_range(
-                                time_range=time_range,
-                                time_shift=offset,
-                            )
-                            flt["val"] = f"{new_outer_from_dttm} : 
{new_outer_to_dttm}"
-                else:
-                    # If it IS a datetime series, we still need to clear 
conflicts
-                    query_object_clone.filter = 
copy.deepcopy(query_object_clone.filter)
-
-                    # For relative offsets with datetime series, ensure the 
temporal
-                    # filter matches our range
-                    temporal_col = query_object_clone.granularity or 
x_axis_label
-
-                    # Update any existing temporal filters to match our 
shifted range
-                    for flt in query_object_clone.filter:
-                        if (
-                            flt.get("op") == FilterOperator.TEMPORAL_RANGE
-                            and flt.get("col") == temporal_col
-                        ):
-                            flt["val"] = (
-                                f"{query_object_clone.from_dttm} : "
-                                f"{query_object_clone.to_dttm}"
-                            )
-
-            # Remove non-temporal x-axis filters (but keep temporal ones)
-            query_object_clone.filter = [
-                flt
-                for flt in query_object_clone.filter
-                if not (
-                    flt.get("col") == x_axis_label
-                    and flt.get("op") != FilterOperator.TEMPORAL_RANGE
-                )
-            ]
-
-            # Continue with the rest of the method (caching, execution, etc.)
-            cached_time_offset_key = (
-                offset if offset == original_offset else 
f"{offset}_{original_offset}"
-            )
-
-            cache_key = self.query_cache_key(
-                query_object_clone,
-                time_offset=cached_time_offset_key,
-                time_grain=time_grain,
-            )
-            cache = QueryCacheManager.get(
-                cache_key, CacheRegion.DATA, query_context.force
-            )
-
-            if cache.is_loaded:
-                offset_dfs[offset] = cache.df
-                queries.append(cache.query)
-                cache_keys.append(cache_key)
-                continue
-
-            query_object_clone_dct = query_object_clone.to_dict()
-
-            # rename metrics: SUM(value) => SUM(value) 1 year ago
-            metrics_mapping = {
-                metric: TIME_COMPARISON.join([metric, original_offset])
-                for metric in metric_names
-            }
-
-            # When the original query has limit or offset we wont apply those
-            # to the subquery so we prevent data inconsistency due to missing 
records
-            # in the dataframes when performing the join
-            if query_object.row_limit or query_object.row_offset:
-                query_object_clone_dct["row_limit"] = 
current_app.config["ROW_LIMIT"]
-                query_object_clone_dct["row_offset"] = 0
-
-            if isinstance(self._qc_datasource, Query):
-                result = self._qc_datasource.exc_query(query_object_clone_dct)
-            else:
-                result = self._qc_datasource.query(query_object_clone_dct)
-
-            queries.append(result.query)
-            cache_keys.append(None)
-
-            offset_metrics_df = result.df
-            if offset_metrics_df.empty:
-                offset_metrics_df = pd.DataFrame(
-                    {
-                        col: [np.NaN]
-                        for col in join_keys + list(metrics_mapping.values())
-                    }
-                )
-            else:
-                # 1. normalize df, set dttm column
-                offset_metrics_df = self.normalize_df(
-                    offset_metrics_df, query_object_clone
-                )
-
-                # 2. rename extra query columns
-                offset_metrics_df = 
offset_metrics_df.rename(columns=metrics_mapping)
-
-            # cache df and query
-            value = {
-                "df": offset_metrics_df,
-                "query": result.query,
-            }
-            cache.set(
-                key=cache_key,
-                value=value,
-                timeout=self.get_cache_timeout(),
-                datasource_uid=query_context.datasource.uid,
-                region=CacheRegion.DATA,
-            )
-            offset_dfs[offset] = offset_metrics_df
-
-        if offset_dfs:
-            df = self.join_offset_dfs(
-                df,
-                offset_dfs,
-                time_grain,
-                join_keys,
-            )
-
-        return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys)
-
-    def _get_temporal_column_for_filter(  # noqa: C901
-        self, query_object: QueryObject, x_axis_label: str | None
-    ) -> str | None:
-        """
-        Helper method to reliably determine the temporal column for filtering.
-
-        This method tries multiple strategies to find the correct temporal 
column:
-        1. Use explicitly set granularity
-        2. Use x_axis_label if it's a temporal column
-        3. Find any datetime column in the datasource
-
-        :param query_object: The query object
-        :param x_axis_label: The x-axis label from the query
-        :return: The name of the temporal column, or None if not found
-        """
-        # Strategy 1: Use explicitly set granularity
-        if query_object.granularity:
-            return query_object.granularity
-
-        # Strategy 2: Use x_axis_label if it exists
-        if x_axis_label:
-            return x_axis_label
-
-        # Strategy 3: Find any datetime column in the datasource
-        if hasattr(self._qc_datasource, "columns"):
-            for col in self._qc_datasource.columns:
-                if hasattr(col, "is_dttm") and col.is_dttm:
-                    if hasattr(col, "column_name"):
-                        return col.column_name
-                    elif hasattr(col, "name"):
-                        return col.name
-
-        return None
-
-    def _process_date_range_offset(
-        self, offset_df: pd.DataFrame, join_keys: list[str]
-    ) -> tuple[pd.DataFrame, list[str]]:
-        """Process date range offset data and return modified DataFrame and 
keys."""
-        temporal_cols = ["ds", "__timestamp", "dttm"]
-        non_temporal_join_keys = [key for key in join_keys if key not in 
temporal_cols]
-
-        if non_temporal_join_keys:
-            return offset_df, non_temporal_join_keys
-
-        metric_columns = [col for col in offset_df.columns if col not in 
temporal_cols]
-
-        if metric_columns:
-            aggregated_values = {}
-            for col in metric_columns:
-                if pd.api.types.is_numeric_dtype(offset_df[col]):
-                    aggregated_values[col] = offset_df[col].sum()
-                else:
-                    aggregated_values[col] = (
-                        offset_df[col].iloc[0] if not offset_df.empty else None
-                    )
-
-            offset_df = pd.DataFrame([aggregated_values])
-
-        return offset_df, []
-
-    def _apply_cleanup_logic(
-        self,
-        df: pd.DataFrame,
-        offset: str,
-        time_grain: str | None,
-        join_keys: list[str],
-        is_date_range_offset: bool,
-    ) -> pd.DataFrame:
-        """Apply appropriate cleanup logic based on offset type."""
-        if time_grain and not is_date_range_offset:
-            if join_keys:
-                col = df.pop(join_keys[0])
-                df.insert(0, col.name, col)
-
-            df.drop(
-                
list(df.filter(regex=f"{OFFSET_JOIN_COLUMN_SUFFIX}|{R_SUFFIX}")),
-                axis=1,
-                inplace=True,
-            )
-        elif is_date_range_offset:
-            df.drop(
-                list(df.filter(regex=f"{R_SUFFIX}")),
-                axis=1,
-                inplace=True,
-            )
-        else:
-            df.drop(
-                list(df.filter(regex=f"{R_SUFFIX}")),
-                axis=1,
-                inplace=True,
-            )
-
-        return df
-
-    def _determine_join_keys(
-        self,
-        df: pd.DataFrame,
-        offset_df: pd.DataFrame,
-        offset: str,
-        time_grain: str | None,
-        join_keys: list[str],
-        is_date_range_offset: bool,
-        join_column_producer: Any,
-    ) -> tuple[pd.DataFrame, list[str]]:
-        """Determine appropriate join keys and modify DataFrames if needed."""
-        if time_grain and not is_date_range_offset:
-            column_name = OFFSET_JOIN_COLUMN_SUFFIX + offset
-
-            # Add offset join columns for relative time offsets
-            self.add_offset_join_column(
-                df, column_name, time_grain, offset, join_column_producer
-            )
-            self.add_offset_join_column(
-                offset_df, column_name, time_grain, None, join_column_producer
-            )
-            return offset_df, [column_name, *join_keys[1:]]
-
-        elif is_date_range_offset:
-            return self._process_date_range_offset(offset_df, join_keys)
-
-        else:
-            return offset_df, join_keys
-
-    def _perform_join(
-        self, df: pd.DataFrame, offset_df: pd.DataFrame, actual_join_keys: 
list[str]
-    ) -> pd.DataFrame:
-        """Perform the appropriate join operation."""
-        if actual_join_keys:
-            return dataframe_utils.left_join_df(
-                left_df=df,
-                right_df=offset_df,
-                join_keys=actual_join_keys,
-                rsuffix=R_SUFFIX,
-            )
-        else:
-            temp_key = "__temp_join_key__"
-            df[temp_key] = 1
-            offset_df[temp_key] = 1
-
-            result_df = dataframe_utils.left_join_df(
-                left_df=df,
-                right_df=offset_df,
-                join_keys=[temp_key],
-                rsuffix=R_SUFFIX,
-            )
-
-            # Remove temporary join keys
-            result_df.drop(columns=[temp_key], inplace=True, errors="ignore")
-            result_df.drop(
-                columns=[f"{temp_key}{R_SUFFIX}"], inplace=True, 
errors="ignore"
-            )
-            return result_df
-
-    def join_offset_dfs(
-        self,
-        df: pd.DataFrame,
-        offset_dfs: dict[str, pd.DataFrame],
-        time_grain: str | None,
-        join_keys: list[str],
-    ) -> pd.DataFrame:
-        """
-        Join offset DataFrames with the main DataFrame.
-
-        :param df: The main DataFrame.
-        :param offset_dfs: A list of offset DataFrames.
-        :param time_grain: The time grain used to calculate the temporal join 
key.
-        :param join_keys: The keys to join on.
-        """
-        join_column_producer = current_app.config[
-            "TIME_GRAIN_JOIN_COLUMN_PRODUCERS"
-        ].get(time_grain)
-
-        if join_column_producer and not time_grain:
-            raise QueryObjectValidationError(
-                _("Time Grain must be specified when using Time Shift.")
-            )
-
-        for offset, offset_df in offset_dfs.items():
-            is_date_range_offset = self.is_valid_date_range(
-                offset
-            ) and feature_flag_manager.is_feature_enabled(
-                "DATE_RANGE_TIMESHIFTS_ENABLED"
-            )
-
-            offset_df, actual_join_keys = self._determine_join_keys(
-                df,
-                offset_df,
-                offset,
-                time_grain,
-                join_keys,
-                is_date_range_offset,
-                join_column_producer,
-            )
-
-            df = self._perform_join(df, offset_df, actual_join_keys)
-            df = self._apply_cleanup_logic(
-                df, offset, time_grain, join_keys, is_date_range_offset
-            )
-
-        return df
-
-    @staticmethod
-    def generate_join_column(
-        row: pd.Series,
-        column_index: int,
-        time_grain: str,
-        time_offset: str | None = None,
-    ) -> str:
-        value = row[column_index]
-
-        if hasattr(value, "strftime"):
-            if time_offset and not 
QueryContextProcessor.is_valid_date_range_static(
-                time_offset
-            ):
-                value = value + DateOffset(**normalize_time_delta(time_offset))
-
-            if time_grain in (
-                TimeGrain.WEEK_STARTING_SUNDAY,
-                TimeGrain.WEEK_ENDING_SATURDAY,
-            ):
-                return value.strftime("%Y-W%U")
-
-            if time_grain in (
-                TimeGrain.WEEK,
-                TimeGrain.WEEK_STARTING_MONDAY,
-                TimeGrain.WEEK_ENDING_SUNDAY,
-            ):
-                return value.strftime("%Y-W%W")
-
-            if time_grain == TimeGrain.MONTH:
-                return value.strftime("%Y-%m")
-
-            if time_grain == TimeGrain.QUARTER:
-                return value.strftime("%Y-Q") + str(value.quarter)
-
-            if time_grain == TimeGrain.YEAR:
-                return value.strftime("%Y")
-
-        return str(value)
-
-    @staticmethod
-    def is_valid_date_range_static(date_range: str) -> bool:
-        """Static version of is_valid_date_range for use in static methods"""
-        try:
-            # Attempt to parse the string as a date range in the format
-            # YYYY-MM-DD:YYYY-MM-DD
-            start_date, end_date = date_range.split(":")
-            datetime.strptime(start_date.strip(), "%Y-%m-%d")
-            datetime.strptime(end_date.strip(), "%Y-%m-%d")
-            return True
-        except ValueError:
-            # If parsing fails, it's not a valid date range in the format
-            # YYYY-MM-DD:YYYY-MM-DD
-            return False
+        return self._qc_datasource.get_query_result(query_object)

Review Comment:
   ### Missing method validation for datasource delegation <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The method now delegates all query processing to the datasource, but there's 
no guarantee that all datasource implementations have a `get_query_result` 
method that accepts a QueryObject parameter.
   
   
   ###### Why this matters
   This could cause AttributeError exceptions at runtime if datasources don't 
implement this method, breaking existing functionality for datasources that 
relied on the QueryContextProcessor's implementation.
   
   ###### Suggested change ∙ *Feature Preview*
   Add a check to ensure the datasource has the required method, or provide a 
fallback implementation:
   
   ```python
   def get_query_result(self, query_object: QueryObject) -> QueryResult:
       """
       Returns a pandas dataframe based on the query object.
   
       This method delegates to the datasource's get_query_result method,
       which handles query execution, normalization, time offsets, and
       post-processing.
       """
       if hasattr(self._qc_datasource, 'get_query_result'):
           return self._qc_datasource.get_query_result(query_object)
       else:
           raise NotImplementedError(
               f"Datasource {type(self._qc_datasource).__name__} does not 
implement get_query_result method"
           )
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/90e6840c-f37f-4b9e-8c05-6646d993ec88/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/90e6840c-f37f-4b9e-8c05-6646d993ec88?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/90e6840c-f37f-4b9e-8c05-6646d993ec88?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/90e6840c-f37f-4b9e-8c05-6646d993ec88?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/90e6840c-f37f-4b9e-8c05-6646d993ec88)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:ddff2e75-9924-4dc2-9a79-e1071bf37b4f -->
   
   
   [](ddff2e75-9924-4dc2-9a79-e1071bf37b4f)



##########
superset/common/query_context.py:
##########
@@ -22,10 +22,7 @@
 import pandas as pd
 
 from superset.common.chart_data import ChartDataResultFormat, 
ChartDataResultType
-from superset.common.query_context_processor import (
-    CachedTimeOffset,
-    QueryContextProcessor,
-)
+from superset.common.query_context_processor import QueryContextProcessor

Review Comment:
   ### Removed public method breaks API compatibility <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The removal of the `processing_time_offsets` method from `QueryContext` 
class breaks the public API without providing an alternative access path, 
potentially causing AttributeError exceptions for existing code that calls this 
method.
   
   
   ###### Why this matters
   Any existing code that calls `query_context.processing_time_offsets(df, 
query_object)` will fail with an AttributeError at runtime, breaking backward 
compatibility and potentially causing application crashes.
   
   ###### Suggested change ∙ *Feature Preview*
   Either restore the method as a delegation to the processor or provide a 
deprecation warning. If the method should remain accessible, add it back:
   
   ```python
   def processing_time_offsets(
       self,
       df: pd.DataFrame,
       query_object: QueryObject,
   ) -> CachedTimeOffset:
       return self._processor.processing_time_offsets(df, query_object)
   ```
   
   Also restore the import:
   
   ```python
   from superset.common.query_context_processor import (
       CachedTimeOffset,
       QueryContextProcessor,
   )
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/59cf081a-73ff-4fc1-a816-999411293823/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/59cf081a-73ff-4fc1-a816-999411293823?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/59cf081a-73ff-4fc1-a816-999411293823?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/59cf081a-73ff-4fc1-a816-999411293823?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/59cf081a-73ff-4fc1-a816-999411293823)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:e5e17dd3-cb77-4b40-a840-83accfa03b66 -->
   
   
   [](e5e17dd3-cb77-4b40-a840-83accfa03b66)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] chore: refactor `get_query_result` [superset]

Reply via email to