[GitHub] [superset] zhaoyongjie commented on a diff in pull request #20683: feat: the samples endpoint supports filters and pagination

GitBox Thu, 14 Jul 2022 04:34:58 -0700


zhaoyongjie commented on code in PR #20683:
URL: https://github.com/apache/superset/pull/20683#discussion_r921055103



##########
superset/datasets/commands/samples.py:
##########
@@ -30,40 +29,79 @@
     DatasetSamplesFailedError,
 )
 from superset.datasets.dao import DatasetDAO
+from superset.datasets.schemas import DatasetSamplesQuerySchema
 from superset.exceptions import SupersetSecurityException
-from superset.utils.core import QueryStatus
-
-logger = logging.getLogger(__name__)
+from superset.utils.core import DatasourceDict, QueryStatus
 
 
 class SamplesDatasetCommand(BaseCommand):
-    def __init__(self, model_id: int, force: bool):
+    def __init__(
+        self,
+        model_id: int,
+        force: bool,
+        *,
+        payload: Optional[DatasetSamplesQuerySchema] = None,
+        page: Optional[int] = None,
+        per_page: Optional[int] = None,
+    ):
         self._model_id = model_id
         self._force = force
         self._model: Optional[SqlaTable] = None
+        self._payload = payload
+        self._page = page
+        self._per_page = per_page
 
     def run(self) -> Dict[str, Any]:
         self.validate()
-        if not self._model:
-            raise DatasetNotFoundError()
+        limit_clause = self.get_limit_clause(self._page, self._per_page)
+        self._model = cast(SqlaTable, self._model)
+        datasource: DatasourceDict = {
+            "type": self._model.type,
+            "id": self._model.id,
+        }
 
-        qc_instance = QueryContextFactory().create(
-            datasource={
-                "type": self._model.type,
-                "id": self._model.id,
-            },
-            queries=[{}],
+        # constructing samples query
+        samples_instance = QueryContextFactory().create(
+            datasource=datasource,
+            queries=[
+                {**self._payload, **limit_clause} if self._payload else 
limit_clause
+            ],
             result_type=ChartDataResultType.SAMPLES,
             force=self._force,
         )
-        results = qc_instance.get_payload()
+
+        # constructing count(*) query
+        count_star_payload = {
+            "metrics": [
+                {
+                    "expressionType": "SQL",
+                    "sqlExpression": "COUNT(*)",
+                    "label": "COUNT(*)",
+                }
+            ]
+        }
+        count_star_instance = QueryContextFactory().create(
+            datasource=datasource,
+            queries=[count_star_payload],
+            result_type=ChartDataResultType.FULL,
+            force=self._force,
+        )
+        samples_results = samples_instance.get_payload()
+        count_star_results = count_star_instance.get_payload()
+
         try:
-            sample_data = results["queries"][0]
-            error_msg = sample_data.get("error")
-            if sample_data.get("status") == QueryStatus.FAILED and error_msg:
+            sample_data = samples_results["queries"][0]
+            count_star_data = count_star_results["queries"][0]
+            failed_status = (
+                sample_data.get("status") == QueryStatus.FAILED
+                or count_star_data.get("status") == QueryStatus.FAILED
+            )
+            error_msg = sample_data.get("error") or 
count_star_data.get("error")
+            if failed_status and error_msg:
                 cache_key = sample_data.get("cache_key")
                 QueryCacheManager.delete(cache_key, region=CacheRegion.DATA)
                 raise DatasetSamplesFailedError(error_msg)
+            sample_data["dataset_count_star"] = 
count_star_data["data"][0]["COUNT(*)"]

Review Comment:
   the response of the `samples` endpoint added a new field that represents 
size of data in a dataset 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [superset] zhaoyongjie commented on a diff in pull request #20683: feat: the samples endpoint supports filters and pagination

Reply via email to