Re: [PR] feat(sqllab): add latest partition support for BigQuery [superset]

via GitHub Wed, 27 Nov 2024 06:31:48 -0800


betodealmeida commented on code in PR #30760:
URL: https://github.com/apache/superset/pull/30760#discussion_r1860749718



##########
superset/db_engine_specs/bigquery.py:
##########
@@ -284,66 +290,90 @@ def _truncate_label(cls, label: str) -> str:
         return "_" + md5_sha_from_str(label)
 
     @classmethod
-    @deprecated(deprecated_in="3.0")
-    def normalize_indexes(cls, indexes: list[dict[str, Any]]) -> 
list[dict[str, Any]]:
-        """
-        Normalizes indexes for more consistency across db engines
+    def where_latest_partition(
+        cls,
+        database: Database,
+        table: Table,
+        query: Select,
+        columns: list[ResultSetColumnType] | None = None,
+    ) -> Select | None:
+        if partition_column := cls.get_time_partition_column(database, table):
+            max_partition_id = cls.get_max_partition_id(database, table)
+            query = query.where(
+                column(partition_column) == func.PARSE_DATE("%Y%m%d", 
max_partition_id)
+            )
 
-        :param indexes: Raw indexes as returned by SQLAlchemy
-        :return: cleaner, more aligned index definition
-        """
-        normalized_idxs = []
-        # Fixing a bug/behavior observed in pybigquery==0.4.15 where
-        # the index's `column_names` == [None]
-        # Here we're returning only non-None indexes
-        for ix in indexes:
-            column_names = ix.get("column_names") or []
-            ix["column_names"] = [col for col in column_names if col is not 
None]
-            if ix["column_names"]:
-                normalized_idxs.append(ix)
-        return normalized_idxs
+        return query
 
     @classmethod
-    def get_indexes(
+    def get_max_partition_id(
         cls,
         database: Database,
-        inspector: Inspector,
         table: Table,
-    ) -> list[dict[str, Any]]:
-        """
-        Get the indexes associated with the specified schema/table.
+    ) -> Select | None:
+        sql = dedent(f"""\
+            SELECT
+                MAX(partition_id) AS max_partition_id
+            FROM `{table.schema}.INFORMATION_SCHEMA.PARTITIONS`
+            WHERE table_name = '{table.table}'
+        """)
+        df = database.get_df(sql)

Review Comment:
   I think we need a standard method in the DB engine spec to run SQL. Looks 
like some engine specs use `engine.execute`, some use `cursor.execute`, some 
`inspector.bind.execute`, and others `database.get_df`. 
   
   We need something where `catalog` and `schema` are not optional (but can be 
null) to prevent people from forgetting to pass them:
   
   ```python
   class BaseEngineSpec:
       def execute_sql(self, sql: str, catalog: str | None, schema: str | None):
           ...
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscr...@superset.apache.org
For additional commands, e-mail: notifications-h...@superset.apache.org

Re: [PR] feat(sqllab): add latest partition support for BigQuery [superset]

Reply via email to