dpgaspar commented on a change in pull request #14225: URL: https://github.com/apache/superset/pull/14225#discussion_r619785418
########## File path: superset/db_engine_specs/superset.py ########## @@ -0,0 +1,330 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import datetime +import operator +import urllib.parse +from functools import wraps +from typing import ( + Any, + Callable, + cast, + Dict, + Iterator, + List, + Optional, + Tuple, + Type, + TypeVar, +) + +from flask import g +from flask_login import current_user +from shillelagh.adapters.base import Adapter +from shillelagh.backends.apsw.dialect import APSWDialect +from shillelagh.exceptions import ProgrammingError +from shillelagh.fields import ( + Blob, + Boolean, + Date, + DateTime, + Field, + Float, + Integer, + Order, + String, + Time, +) +from shillelagh.filters import Equal, Filter, Range +from shillelagh.types import RequestedOrder, Row +from sqlalchemy import MetaData, Table +from sqlalchemy.engine.url import URL +from sqlalchemy.exc import NoSuchTableError +from sqlalchemy.pool.base import _ConnectionFairy +from sqlalchemy.sql import Select, select + +from superset import db, security_manager, sql_parse +from superset.db_engine_specs.sqlite import SqliteEngineSpec + + +class SupersetEngineSpec(SqliteEngineSpec): + """ + Internal engine for Superset + + This DB engine spec is a meta-database. It uses the shillelagh library + to build a DB that can operate across different Superset databases. + """ + + engine = "superset" + engine_name = "Superset" + + +# pylint: disable=abstract-method +class SupersetAPSWDialect(APSWDialect): + + """ + A SQLAlchemy dialect for an internal Superset engine. + + This dialect allows query to be executed across different Superset + databases. For example, to read data from the `birth_names` table in the + `examples` databases: + + >>> engine = create_engine('superset://') + >>> conn = engine.connect() + >>> results = conn.execute('SELECT * FROM "superset.examples.birth_names"') + + Queries can also join data across different Superset databases. + + The dialect is built in top of the shillelagh library, leveraging SQLite to + create virtual tables on-the-fly proxying Superset tables. The + `SupersetShillelaghAdapter` adapter is responsible for returning data when a + Superset table is accessed. + """ + + name = "superset" + + # pylint: disable=unused-argument + def create_connect_args(self, url: URL) -> Tuple[Tuple[()], Dict[str, Any]]: + return ( + (), + { + "path": ":memory:", + "adapters": ["superset"], + "adapter_args": {}, + "safe": True, + "isolation_level": self.isolation_level, + }, + ) + + # pylint: disable=unused-argument, no-self-use + def get_schema_names( + self, connection: _ConnectionFairy, **kwargs: Any + ) -> List[str]: + return [] + + +# pylint: disable=invalid-name +F = TypeVar("F", bound=Callable[..., Any]) + + +def check_dml(method: F) -> F: + @wraps(method) + def wrapper(self: "SupersetShillelaghAdapter", *args: Any, **kwargs: Any) -> Any: + # pylint: disable=protected-access + if not self._allow_dml: + raise ProgrammingError(f'DML not enabled in database "{self.database}"') + return method(self, *args, **kwargs) + + return cast(F, wrapper) + + +def has_rowid(method: F) -> F: + @wraps(method) + def wrapper(self: "SupersetShillelaghAdapter", *args: Any, **kwargs: Any) -> Any: + # pylint: disable=protected-access + if not self._rowid: + raise ProgrammingError( + "Can only modify data in a table with a single, integer, primary key" + ) + return method(self, *args, **kwargs) + + return cast(F, wrapper) + + +# pylint: disable=too-many-instance-attributes +class SupersetShillelaghAdapter(Adapter): + + """ + A shillelagh adapter for Superset tables. + + Shillelagh adapters are responsible for fetching data from a given resource, + allowing it to be represented as a virtual table in SQLite. This one works + as a proxy to Superset tables. + """ + + safe = True + + type_map: Dict[Any, Type[Field]] = { + bool: Boolean, + float: Float, + int: Integer, + str: String, + datetime.date: Date, + datetime.datetime: DateTime, + datetime.time: Time, + } + + @staticmethod + def supports(uri: str) -> bool: + # An URL for a table has the format superset.database[.catalog][.schema].table, + # eg, superset.examples.birth_names + parsed = urllib.parse.urlparse(uri) + parts = parsed.path.split(".") + return 3 <= len(parts) <= 5 and parts[0] == "superset" + + @staticmethod + def parse_uri(uri: str) -> Tuple[str, Optional[str], Optional[str], str]: + parsed = urllib.parse.urlparse(uri) + parts = parsed.path.split(".") + if len(parts) == 3: + return parts[1], None, None, parts[2] + if len(parts) == 4: + return parts[1], None, parts[2], parts[3] + return tuple(parts[1:]) # type: ignore + + def __init__( + self, database: str, catalog: Optional[str], schema: Optional[str], table: str, + ): + self.database = database + self.catalog = catalog + self.schema = schema + self.table = table + + self._rowid: Optional[str] = None + self._allow_dml: bool = False + self._set_columns() + + @classmethod + def get_field(cls, python_type: Any) -> Field: + class_ = cls.type_map.get(python_type, Blob) + return class_(filters=[Equal, Range], order=Order.ANY, exact=True) + + def _set_columns(self) -> None: + from superset.models.core import Database + + database = ( + db.session.query(Database).filter_by(database_name=self.database).first() + ) + if database is None: + raise ProgrammingError(f"Database not found: {self.database}") + self._allow_dml = database.allow_dml + + # verify permissions + g.user = current_user Review comment: Why do we need this? `g.user` is already set by FAB ########## File path: tests/db_engine_specs/superset_tests.py ########## @@ -0,0 +1,159 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +from unittest import mock + +import pytest +from sqlalchemy.engine import create_engine +from sqlalchemy.exc import ProgrammingError + +from superset import db, security_manager +from superset.exceptions import SupersetSecurityException +from superset.models.core import Database + +session = db.session + + [email protected]() +def database1(): + database = Database( + database_name="database1", + sqlalchemy_uri="sqlite:///database1.db", + allow_dml=True, + ) + session.add(database) + session.commit() + + yield database + + session.delete(database) + session.commit() + os.unlink("database1.db") + + [email protected]() +def table1(database1): + engine = database1.get_sqla_engine() + conn = engine.connect() + conn.execute("CREATE TABLE table1 (a INTEGER NOT NULL PRIMARY KEY, b INTEGER)") + conn.execute("INSERT INTO table1 (a, b) VALUES (1, 10), (2, 20)") + session.commit() + + yield + + conn.execute("DROP TABLE table1") + session.commit() + + [email protected]() +def database2(): + database = Database( + database_name="database2", + sqlalchemy_uri="sqlite:///database2.db", + allow_dml=False, + ) + session.add(database) + session.commit() + + yield database + + session.delete(database) + session.commit() + os.unlink("database2.db") + + [email protected]() +def table2(database2): + engine = database2.get_sqla_engine() + conn = engine.connect() + conn.execute("CREATE TABLE table2 (a INTEGER NOT NULL PRIMARY KEY, b TEXT)") + conn.execute("INSERT INTO table2 (a, b) VALUES (1, 'ten'), (2, 'twenty')") + session.commit() + + yield + + conn.execute("DROP TABLE table2") + session.commit() + + [email protected]("superset.security.manager.g") +def test_superset(g, app_context, table1): + g.user = security_manager.find_user("admin") + + engine = create_engine("superset://") + conn = engine.connect() + results = conn.execute('SELECT * FROM "superset.database1.table1"') + assert list(results) == [(1, 10), (2, 20)] + + [email protected]("superset.security.manager.g") +def test_superset_joins(g, app_context, table1, table2): + g.user = security_manager.find_user("admin") + + engine = create_engine("superset://") + conn = engine.connect() + results = conn.execute( + """ + SELECT t1.b, t2.b + FROM "superset.database1.table1" AS t1 + JOIN "superset.database2.table2" AS t2 + ON t1.a = t2.a + """ + ) + assert list(results) == [(10, "ten"), (20, "twenty")] + + [email protected]("superset.security.manager.g") +def test_dml(g, app_context, table1, table2): + g.user = security_manager.find_user("admin") + + engine = create_engine("superset://") + conn = engine.connect() + + conn.execute('INSERT INTO "superset.database1.table1" (a, b) VALUES (3, 30)') + results = conn.execute('SELECT * FROM "superset.database1.table1"') + assert list(results) == [(1, 10), (2, 20), (3, 30)] + conn.execute('UPDATE "superset.database1.table1" SET b=35 WHERE a=3') + results = conn.execute('SELECT * FROM "superset.database1.table1"') + assert list(results) == [(1, 10), (2, 20), (3, 35)] + conn.execute('DELETE FROM "superset.database1.table1" WHERE b>20') + results = conn.execute('SELECT * FROM "superset.database1.table1"') + assert list(results) == [(1, 10), (2, 20)] + + with pytest.raises(ProgrammingError) as excinfo: + conn.execute( + """INSERT INTO "superset.database2.table2" (a, b) VALUES (3, 'thirty')""" + ) + assert ( + str(excinfo.value).strip() + == '(shillelagh.exceptions.ProgrammingError) DML not enabled in database "database2"\n[SQL: INSERT INTO "superset.database2.table2" (a, b) VALUES (3, \'thirty\')]\n(Background on this error at: http://sqlalche.me/e/13/f405)' + ) + + [email protected]("superset.security.manager.g") +def test_security_manager(g, app_context, table1): + g.user = security_manager.find_user("gamma") + + engine = create_engine("superset://") + conn = engine.connect() + with pytest.raises(SupersetSecurityException) as excinfo: + conn.execute('SELECT * FROM "superset.database1.table1"') Review comment: We have database, schema and dataset level access. Can you add those test combinations also ########## File path: superset/db_engine_specs/superset.py ########## @@ -0,0 +1,330 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import datetime +import operator +import urllib.parse +from functools import wraps +from typing import ( + Any, + Callable, + cast, + Dict, + Iterator, + List, + Optional, + Tuple, + Type, + TypeVar, +) + +from flask import g +from flask_login import current_user +from shillelagh.adapters.base import Adapter +from shillelagh.backends.apsw.dialect import APSWDialect +from shillelagh.exceptions import ProgrammingError +from shillelagh.fields import ( + Blob, + Boolean, + Date, + DateTime, + Field, + Float, + Integer, + Order, + String, + Time, +) +from shillelagh.filters import Equal, Filter, Range +from shillelagh.types import RequestedOrder, Row +from sqlalchemy import MetaData, Table +from sqlalchemy.engine.url import URL +from sqlalchemy.exc import NoSuchTableError +from sqlalchemy.pool.base import _ConnectionFairy +from sqlalchemy.sql import Select, select + +from superset import db, security_manager, sql_parse +from superset.db_engine_specs.sqlite import SqliteEngineSpec + + +class SupersetEngineSpec(SqliteEngineSpec): + """ + Internal engine for Superset + + This DB engine spec is a meta-database. It uses the shillelagh library + to build a DB that can operate across different Superset databases. + """ + + engine = "superset" + engine_name = "Superset" + + +# pylint: disable=abstract-method +class SupersetAPSWDialect(APSWDialect): + + """ + A SQLAlchemy dialect for an internal Superset engine. + + This dialect allows query to be executed across different Superset Review comment: `This dialect allows queries to be executed across different Superset` better? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
