michael-s-molina commented on code in PR #28432:
URL: https://github.com/apache/superset/pull/28432#discussion_r1600452645


##########
superset/common/query_context_processor.py:
##########
@@ -519,51 +510,112 @@ def processing_time_offsets(  # pylint: 
disable=too-many-locals,too-many-stateme
                 datasource_uid=query_context.datasource.uid,
                 region=CacheRegion.DATA,
             )
-            offset_dfs.append(offset_metrics_df)
+            offset_dfs[offset] = offset_metrics_df
 
         if offset_dfs:
-            # iterate on offset_dfs, left join each with df
-            for offset_df in offset_dfs:
-                df = dataframe_utils.left_join_df(
-                    left_df=df,
-                    right_df=offset_df,
-                    join_keys=join_keys,
-                    rsuffix=R_SUFFIX,
-                )
+            df = self.join_offset_dfs(
+                df,
+                offset_dfs,
+                time_grain,
+                join_keys,
+            )
+
+        return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys)
+
+    def join_offset_dfs(
+        self,
+        df: pd.DataFrame,
+        offset_dfs: dict[str, pd.DataFrame],
+        time_grain: str,
+        join_keys: list[str],
+    ) -> pd.DataFrame:
+        """
+        Join offset DataFrames with the main DataFrame.
 
-        # removes columns used for join
-        df.drop(
-            list(df.filter(regex=f"{AGGREGATED_JOIN_COLUMN}|{R_SUFFIX}")),
-            axis=1,
-            inplace=True,
+        :param df: The main DataFrame.
+        :param offset_dfs: A list of offset DataFrames.
+        :param time_grain: The time grain used to calculate the temporal join 
key.
+        :param join_keys: The keys to join on.
+        """
+        join_column_producer = config["TIME_GRAIN_JOIN_COLUMN_PRODUCERS"].get(
+            time_grain
         )
 
-        return CachedTimeOffset(df=df, queries=queries, cache_keys=cache_keys)
+        # iterate on offset_dfs, left join each with df
+        for offset, offset_df in offset_dfs.items():
+            # defines a column name for the offset join column
+            column_name = OFFSET_JOIN_COLUMN_SUFFIX + offset
+
+            # add offset join column to df
+            self.add_offset_join_column(
+                df, column_name, time_grain, offset, join_column_producer
+            )
+
+            # add artifoffseticial join column to offset_df
+            self.add_offset_join_column(
+                offset_df, column_name, time_grain, None, join_column_producer
+            )
+
+            # the temporal column is the first column in the join keys
+            # so we use the join column instead of the temporal column
+            actual_join_keys = [column_name, *join_keys[1:]]
+
+            # left join df with offset_df
+            df = dataframe_utils.left_join_df(
+                left_df=df,
+                right_df=offset_df,
+                join_keys=actual_join_keys,
+                rsuffix=R_SUFFIX,
+            )
+
+            # move the temporal column to the first column in df
+            col = df.pop(join_keys[0])

Review Comment:
   Yep



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to