splasky commented on code in PR #37677:
URL: https://github.com/apache/superset/pull/37677#discussion_r2774203148


##########
superset/db_engine_specs/datastore.py:
##########
@@ -0,0 +1,611 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import logging
+import re
+from datetime import datetime
+from re import Pattern
+from typing import Any, TYPE_CHECKING, TypedDict
+from urllib import parse
+
+from apispec import APISpec
+from apispec.ext.marshmallow import MarshmallowPlugin
+from flask_babel import gettext as __
+from marshmallow import fields, Schema
+from marshmallow.exceptions import ValidationError
+from sqlalchemy import column, types
+from sqlalchemy.engine.base import Engine
+from sqlalchemy.engine.reflection import Inspector
+from sqlalchemy.engine.url import URL
+from sqlalchemy.sql import sqltypes
+
+from superset.constants import TimeGrain
+from superset.databases.schemas import encrypted_field_properties, 
EncryptedString
+from superset.databases.utils import make_url_safe
+from superset.db_engine_specs.base import (
+    BaseEngineSpec,
+    BasicPropertiesType,
+    DatabaseCategory,
+)
+from superset.db_engine_specs.exceptions import SupersetDBAPIConnectionError
+from superset.errors import SupersetError, SupersetErrorType
+from superset.exceptions import SupersetException
+from superset.models.core import Database
+from superset.sql.parse import LimitMethod, Table
+from superset.superset_typing import ResultSetColumnType
+from superset.utils import json
+from superset.utils.hashing import hash_from_str
+
+logger = logging.getLogger(__name__)
+
+try:
+    import google.auth
+    from google.cloud import datastore
+    from google.oauth2 import service_account
+
+    dependencies_installed = True
+except ImportError:
+    dependencies_installed = False
+
+if TYPE_CHECKING:
+    from superset.models.core import Database  # pragma: no cover
+    from superset.models.sql_lab import Query  # pragma: no cover
+
+logger = logging.getLogger()
+
+CONNECTION_DATABASE_PERMISSIONS_REGEX = re.compile(
+    "Access Denied: Project (?P<project_name>.+?): User does not have "
+    + "datastore.databases.create permission in project (?P<project>.+?)"
+)
+
+TABLE_DOES_NOT_EXIST_REGEX = re.compile(
+    'Table name "(?P<table>.*?)" missing dataset while no default '
+    "dataset is set in the request"
+)
+
+COLUMN_DOES_NOT_EXIST_REGEX = re.compile(
+    r"Unrecognized name: (?P<column>.*?) at \[(?P<location>.+?)\]"
+)
+
+SCHEMA_DOES_NOT_EXIST_REGEX = re.compile(
+    r"datastore error: 404 Not found: Dataset (?P<dataset>.*?):"
+    r"(?P<schema>.*?) was not found in location"
+)
+
+SYNTAX_ERROR_REGEX = re.compile(
+    'Syntax error: Expected end of input but got identifier 
"(?P<syntax_error>.+?)"'
+)
+
+ma_plugin = MarshmallowPlugin()
+
+
+class DatastoreParametersSchema(Schema):
+    credentials_info = EncryptedString(
+        required=False,
+        metadata={"description": "Contents of Datastore JSON credentials."},
+    )
+    query = fields.Dict(required=False)
+
+
+class DatastoreParametersType(TypedDict):
+    credentials_info: dict[str, Any]
+    query: dict[str, Any]
+
+
+class DatastoreEngineSpec(BaseEngineSpec):  # pylint: 
disable=too-many-public-methods
+    """Engine spec for Google's Datastore
+
+    As contributed by @hychang.1997.tw"""
+
+    engine = "datastore"
+    engine_name = "Google Datastore"
+    max_column_name_length = 128
+    disable_ssh_tunneling = True
+
+    parameters_schema = DatastoreParametersSchema()
+    default_driver = "datastore"
+    sqlalchemy_uri_placeholder = "datastore://{project_id}"
+
+    # Use FETCH_MANY to prevent Superset from injecting LIMIT via sqlglot AST
+    # manipulation. GQL queries should not be modified by sqlglot since it
+    # uses BigQuery dialect which transforms GQL-incompatible syntax.
+    limit_method = LimitMethod.FETCH_MANY
+
+    metadata = {
+        "description": (
+            "Google Cloud Datastore is a highly scalable NoSQL database "
+            "for your applications."
+        ),
+        "logo": "google-biquery.png",
+        "homepage_url": "https://cloud.google.com/datastore/";,
+        "categories": [
+            DatabaseCategory.CLOUD_GCP,
+            DatabaseCategory.PROPRIETARY,
+        ],
+        "pypi_packages": ["python-datastore-sqlalchemy"],
+        "connection_string": "datastore://{project_id}",
+        "authentication_methods": [
+            {
+                "name": "Service Account JSON",
+                "description": (
+                    "Upload service account credentials JSON or paste in 
Secure Extra"
+                ),
+                "secure_extra": {
+                    "credentials_info": {
+                        "type": "service_account",
+                        "project_id": "...",
+                        "private_key_id": "...",
+                        "private_key": "...",
+                        "client_email": "...",
+                        "client_id": "...",
+                        "auth_uri": "...",
+                        "token_uri": "...",
+                    }
+                },
+            },
+        ],
+        "notes": (
+            "Create a Service Account via GCP console with access to "
+            "datastore datasets."
+        ),
+        "docs_url": "https://github.com/splasky/Python-datastore-sqlalchemy";,
+    }
+
+    # Datastore doesn't maintain context when running multiple statements in 
the
+    # same cursor, so we need to run all statements at once
+    run_multiple_statements_as_one = True
+
+    allows_hidden_cc_in_orderby = True
+
+    supports_dynamic_schema = True
+    supports_catalog = supports_dynamic_catalog = 
supports_cross_catalog_queries = True
+
+    # when editing the database, mask this field in `encrypted_extra`
+    # pylint: disable=invalid-name
+    encrypted_extra_sensitive_fields = {"$.credentials_info.private_key"}
+
+    """
+    https://www.python.org/dev/peps/pep-0249/#arraysize
+    raw_connections bypass the sqlalchemy-datastore query execution context 
and deal
+    with raw dbapi connection directly.
+    If this value is not set, the default value is set to 1.
+    """
+    arraysize = 5000
+
+    _date_trunc_functions = {
+        "DATE": "DATE_TRUNC",
+        "DATETIME": "DATETIME_TRUNC",
+        "TIME": "TIME_TRUNC",
+        "TIMESTAMP": "TIMESTAMP_TRUNC",
+    }
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "CAST(TIMESTAMP_SECONDS("
+        "UNIX_SECONDS(CAST({col} AS TIMESTAMP))"
+        ") AS {type})",
+        TimeGrain.MINUTE: "CAST(TIMESTAMP_SECONDS("
+        "60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 60)"
+        ") AS {type})",
+        TimeGrain.FIVE_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "5*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 5*60)"
+        ") AS {type})",
+        TimeGrain.TEN_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "10*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 10*60)"
+        ") AS {type})",
+        TimeGrain.FIFTEEN_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "15*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 15*60)"
+        ") AS {type})",
+        TimeGrain.THIRTY_MINUTES: "CAST(TIMESTAMP_SECONDS("
+        "30*60 * DIV(UNIX_SECONDS(CAST({col} AS TIMESTAMP)), 30*60)"
+        ") AS {type})",
+        TimeGrain.HOUR: "{func}({col}, HOUR)",
+        TimeGrain.DAY: "{func}({col}, DAY)",
+        TimeGrain.WEEK: "{func}({col}, WEEK)",
+        TimeGrain.WEEK_STARTING_MONDAY: "{func}({col}, ISOWEEK)",
+        TimeGrain.MONTH: "{func}({col}, MONTH)",
+        TimeGrain.QUARTER: "{func}({col}, QUARTER)",
+        TimeGrain.YEAR: "{func}({col}, YEAR)",
+    }
+
+    custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, 
Any]]] = {
+        CONNECTION_DATABASE_PERMISSIONS_REGEX: (
+            __(
+                "Unable to connect. Verify that the following roles are set "
+                'on the service account: "Cloud Datastore Viewer", '
+                '"Cloud Datastore User", "Cloud Datastore Creator"'
+            ),
+            SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
+            {},
+        ),
+        TABLE_DOES_NOT_EXIST_REGEX: (
+            __(
+                'The table "%(table)s" does not exist. '
+                "A valid table must be used to run this query.",
+            ),
+            SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        COLUMN_DOES_NOT_EXIST_REGEX: (
+            __('We can\'t seem to resolve column "%(column)s" at line 
%(location)s.'),
+            SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        SCHEMA_DOES_NOT_EXIST_REGEX: (
+            __(
+                'The schema "%(schema)s" does not exist. '
+                "A valid schema must be used to run this query."
+            ),
+            SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR,
+            {},
+        ),
+        SYNTAX_ERROR_REGEX: (
+            __(
+                "Please check your query for syntax errors at or near "
+                '"%(syntax_error)s". Then, try running your query again.'
+            ),
+            SupersetErrorType.SYNTAX_ERROR,
+            {},
+        ),
+    }
+
+    @staticmethod
+    def _mutate_label(label: str) -> str:
+        """
+        Datastore field_name should start with a letter or underscore and 
contain
+        only alphanumeric characters. Labels that start with a number are 
prefixed
+        with an underscore. Any unsupported characters are replaced with 
underscores
+        and an md5 hash is added to the end of the label to avoid possible
+        collisions.
+
+        :param label: Expected expression label
+        :return: Conditionally mutated label
+        """
+        label_hashed = "_" + hash_from_str(label)
+
+        # if label starts with number, add underscore as first character
+        label_mutated = "_" + label if re.match(r"^\d", label) else label
+
+        # replace non-alphanumeric characters with underscores
+        label_mutated = re.sub(r"[^\w]+", "_", label_mutated)
+        if label_mutated != label:
+            # add first 5 chars from md5 hash to label to avoid possible 
collisions
+            label_mutated += label_hashed[:6]
+
+        return label_mutated
+
+    @classmethod
+    def _truncate_label(cls, label: str) -> str:
+        """Datastore requires column names start with either a letter or
+        underscore. To make sure this is always the case, an underscore is 
prefixed
+        to the md5 hash of the original label.
+
+        :param label: expected expression label
+        :return: truncated label
+        """
+        return "_" + hash_from_str(label)
+
+    @classmethod
+    def convert_dttm(
+        cls, target_type: str, dttm: datetime, db_extra: dict[str, Any] | None 
= None
+    ) -> str | None:
+        sqla_type = cls.get_sqla_column_type(target_type)
+        if isinstance(sqla_type, types.Date):
+            return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+        if isinstance(sqla_type, types.TIMESTAMP):
+            return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS 
TIMESTAMP)"""
+        if isinstance(sqla_type, types.DateTime):
+            return f"""CAST('{dttm.isoformat(timespec="microseconds")}' AS 
DATETIME)"""
+        if isinstance(sqla_type, types.Time):
+            return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME)"""
+        return None
+
+    @classmethod
+    def fetch_data(cls, cursor: Any, limit: int | None = None) -> 
list[tuple[Any, ...]]:
+        data = super().fetch_data(cursor, limit)
+        # Support type Datastore Row, introduced here PR #4071
+        # google.cloud.datastore.table.Row
+        if data and type(data[0]).__name__ == "Row":
+            data = [r.values() for r in data]  # type: ignore
+        return data
+
+    @classmethod
+    def _get_client(cls, engine: Engine, database: Database) -> 
datastore.Client:
+        """
+        Return the Datastore client associated with an engine.
+        """
+        if not dependencies_installed:
+            raise SupersetException(
+                "Could not import libraries needed to connect to Datastore."
+            )
+
+        if credentials_info := engine.dialect.credentials_info:
+            credentials = 
service_account.Credentials.from_service_account_info(
+                credentials_info
+            )
+            return datastore.Client(credentials=credentials)
+
+        try:
+            credentials = google.auth.default()[0]
+            return datastore.Client(credentials=credentials, database=database)

Review Comment:
   Hi. Since the google-cloud-datastore version >= 2.21.0. The datastore API 
has support database in the argument. You can refer to 
https://github.com/googleapis/python-datastore/blob/e2f4d136a5a8d250c5aa909b95e184223b8024c2/google/cloud/datastore/client.py#L247.
  And also refer to the document: 
https://docs.cloud.google.com/python/docs/reference/datastore/latest/client#class-googleclouddatastoreclientclientprojectnone-namespacenone-credentialsnone-clientinfogoogleapicoregapicv1clientinfoclientinfo-object-clientoptionsnone-databasenone-httpnone-usegrpcnone.
 What's the version did you check?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to