This is an automated email from the ASF dual-hosted git repository.
johnbodley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git
The following commit(s) were added to refs/heads/master by this push:
new 9f7bd1e63f fix(presto/trino): Ensure get_table_names only returns real
tables (#21794)
9f7bd1e63f is described below
commit 9f7bd1e63fbd4084b1dd1ad9b1dd718ff43c7e7c
Author: John Bodley <[email protected]>
AuthorDate: Wed Nov 9 14:30:49 2022 -0800
fix(presto/trino): Ensure get_table_names only returns real tables (#21794)
---
UPDATING.md | 1 +
requirements/base.txt | 2 -
requirements/docker.txt | 2 +
requirements/testing.txt | 2 +-
setup.py | 2 +-
superset/databases/api.py | 4 +-
superset/db_engine_specs/base.py | 28 +++---
superset/db_engine_specs/presto.py | 82 ++++++++++++-----
.../db_engine_specs/hive_tests.py | 4 -
.../db_engine_specs/presto_tests.py | 100 +++++++--------------
10 files changed, 118 insertions(+), 109 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index ec3a15a157..4ca468ecbc 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -31,6 +31,7 @@ assists people when migrating to a new version.
- [21002](https://github.com/apache/superset/pull/21002): Support Python 3.10
and bump pandas 1.4 and pyarrow 6.
- [21163](https://github.com/apache/superset/pull/21163): When
`GENERIC_CHART_AXES` feature flags set to `True`, the Time Grain control will
move below the X-Axis control.
- [21284](https://github.com/apache/superset/pull/21284): The non-functional
`MAX_TABLE_NAMES` config key has been removed.
+- [21794](https://github.com/apache/superset/pull/21794): Deprecates the
undocumented `PRESTO_SPLIT_VIEWS_FROM_TABLES` feature flag. Now for Presto,
like other engines, only physical tables are treated as tables.
### Breaking Changes
diff --git a/requirements/base.txt b/requirements/base.txt
index 8387d380bc..3646395b4f 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -108,8 +108,6 @@ geopy==2.2.0
# via apache-superset
graphlib-backport==1.0.3
# via apache-superset
-greenlet==1.1.2
- # via sqlalchemy
gunicorn==20.1.0
# via apache-superset
hashids==1.3.1
diff --git a/requirements/docker.txt b/requirements/docker.txt
index 0c2d36159e..307064dbde 100644
--- a/requirements/docker.txt
+++ b/requirements/docker.txt
@@ -12,6 +12,8 @@
# -r requirements/docker.in
gevent==21.8.0
# via -r requirements/docker.in
+greenlet==1.1.3.post0
+ # via gevent
psycopg2-binary==2.9.1
# via apache-superset
zope-event==4.5.0
diff --git a/requirements/testing.txt b/requirements/testing.txt
index 191fa99c07..8068f3718c 100644
--- a/requirements/testing.txt
+++ b/requirements/testing.txt
@@ -130,7 +130,7 @@ rsa==4.7.2
# via google-auth
statsd==3.3.0
# via -r requirements/testing.in
-trino==0.315.0
+trino==0.319.0
# via apache-superset
typing-inspect==0.7.1
# via libcst
diff --git a/setup.py b/setup.py
index a3c7407f96..4ca6b91086 100644
--- a/setup.py
+++ b/setup.py
@@ -160,7 +160,7 @@ setup(
"pinot": ["pinotdb>=0.3.3, <0.4"],
"postgres": ["psycopg2-binary==2.9.1"],
"presto": ["pyhive[presto]>=0.6.5"],
- "trino": ["trino>=0.313.0"],
+ "trino": ["trino>=0.319.0"],
"prophet": ["prophet>=1.0.1, <1.1", "pystan<3.0"],
"redshift": ["sqlalchemy-redshift>=0.8.1, < 0.9"],
"rockset": ["rockset>=0.8.10, <0.9"],
diff --git a/superset/databases/api.py b/superset/databases/api.py
index c3efda9501..aced8e7c6f 100644
--- a/superset/databases/api.py
+++ b/superset/databases/api.py
@@ -19,7 +19,7 @@ import json
import logging
from datetime import datetime
from io import BytesIO
-from typing import Any, Dict, List, Optional
+from typing import Any, cast, Dict, List, Optional
from zipfile import is_zipfile, ZipFile
from flask import request, Response, send_file
@@ -611,7 +611,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
self.incr_stats("init", self.table_metadata.__name__)
parsed_schema = parse_js_uri_path_item(schema_name,
eval_undefined=True)
- table_name = parse_js_uri_path_item(table_name) # type: ignore
+ table_name = cast(str, parse_js_uri_path_item(table_name))
payload = database.db_engine_spec.extra_table_metadata(
database, table_name, parsed_schema
)
diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
index 55a026541a..2a1363e0b6 100644
--- a/superset/db_engine_specs/base.py
+++ b/superset/db_engine_specs/base.py
@@ -1018,13 +1018,17 @@ class BaseEngineSpec: # pylint:
disable=too-many-public-methods
schema: Optional[str],
) -> List[str]:
"""
- Get all tables from schema
+ Get all the real table names within the specified schema.
- :param database: The database to get info
- :param inspector: SqlAlchemy inspector
- :param schema: Schema to inspect. If omitted, uses default schema for
database
- :return: All tables in schema
+ Per the SQLAlchemy definition if the schema is omitted the database’s
default
+ schema is used, however some dialects infer the request as schema
agnostic.
+
+ :param database: The database to inspect
+ :param inspector: The SQLAlchemy inspector
+ :param schema: The schema to inspect
+ :returns: The physical table names
"""
+
try:
tables = inspector.get_table_names(schema)
except Exception as ex:
@@ -1042,13 +1046,17 @@ class BaseEngineSpec: # pylint:
disable=too-many-public-methods
schema: Optional[str],
) -> List[str]:
"""
- Get all views from schema
+ Get all the view names within the specified schema.
- :param database: The database to get info
- :param inspector: SqlAlchemy inspector
- :param schema: Schema name. If omitted, uses default schema for
database
- :return: All views in schema
+ Per the SQLAlchemy definition if the schema is omitted the database’s
default
+ schema is used, however some dialects infer the request as schema
agnostic.
+
+ :param database: The database to inspect
+ :param inspector: The SQLAlchemy inspector
+ :param schema: The schema to inspect
+ :returns: The view names
"""
+
try:
views = inspector.get_view_names(schema)
except Exception as ex:
diff --git a/superset/db_engine_specs/presto.py
b/superset/db_engine_specs/presto.py
index 22e4f7594c..e959eb2195 100644
--- a/superset/db_engine_specs/presto.py
+++ b/superset/db_engine_specs/presto.py
@@ -19,13 +19,13 @@ from __future__ import annotations
import logging
import re
-import textwrap
import time
from abc import ABCMeta
from collections import defaultdict, deque
from contextlib import closing
from datetime import datetime
from distutils.version import StrictVersion
+from textwrap import dedent
from typing import Any, cast, Dict, List, Optional, Pattern, Tuple,
TYPE_CHECKING, Union
from urllib import parse
@@ -392,46 +392,84 @@ class PrestoEngineSpec(PrestoBaseEngineSpec):
@classmethod
def get_table_names(
- cls, database: Database, inspector: Inspector, schema: Optional[str]
+ cls,
+ database: Database,
+ inspector: Inspector,
+ schema: Optional[str],
) -> List[str]:
- tables = super().get_table_names(database, inspector, schema)
- if not is_feature_enabled("PRESTO_SPLIT_VIEWS_FROM_TABLES"):
- return tables
+ """
+ Get all the real table names within the specified schema.
+
+ Per the SQLAlchemy definition if the schema is omitted the database’s
default
+ schema is used, however some dialects infer the request as schema
agnostic.
- views = set(cls.get_view_names(database, inspector, schema))
- actual_tables = set(tables) - views
- return list(actual_tables)
+ Note that PyHive's Hive and Presto SQLAlchemy dialects do not adhere
to the
+ specification where the `get_table_names` method returns both real
tables and
+ views. Futhermore the dialects wrongfully infer the request as schema
agnostic
+ when the schema is omitted.
+
+ :param database: The database to inspect
+ :param inspector: The SQLAlchemy inspector
+ :param schema: The schema to inspect
+ :returns: The physical table names
+ """
+
+ return sorted(
+ list(
+ set(super().get_table_names(database, inspector, schema))
+ - set(cls.get_view_names(database, inspector, schema))
+ )
+ )
@classmethod
def get_view_names(
- cls, database: Database, inspector: Inspector, schema: Optional[str]
+ cls,
+ database: Database,
+ inspector: Inspector,
+ schema: Optional[str],
) -> List[str]:
- """Returns an empty list
+ """
+ Get all the view names within the specified schema.
- get_table_names() function returns all table names and view names,
- and get_view_names() is not implemented in sqlalchemy_presto.py
-
https://github.com/dropbox/PyHive/blob/e25fc8440a0686bbb7a5db5de7cb1a77bdb4167a/pyhive/sqlalchemy_presto.py
+ Per the SQLAlchemy definition if the schema is omitted the database’s
default
+ schema is used, however some dialects infer the request as schema
agnostic.
+
+ Note that PyHive's Hive and Presto SQLAlchemy dialects do not
implement the
+ `get_view_names` method. To ensure consistency with the
`get_table_names` method
+ the request is deemed schema agnostic when the schema is omitted.
+
+ :param database: The database to inspect
+ :param inspector: The SQLAlchemy inspector
+ :param schema: The schema to inspect
+ :returns: The view names
"""
- if not is_feature_enabled("PRESTO_SPLIT_VIEWS_FROM_TABLES"):
- return []
if schema:
- sql = (
- "SELECT table_name FROM information_schema.views "
- "WHERE table_schema=%(schema)s"
- )
+ sql = dedent(
+ """
+ SELECT table_name FROM information_schema.tables
+ WHERE table_schema = %(schema)s
+ AND table_type = 'VIEW'
+ """
+ ).strip()
params = {"schema": schema}
else:
- sql = "SELECT table_name FROM information_schema.views"
+ sql = dedent(
+ """
+ SELECT table_name FROM information_schema.tables
+ WHERE table_type = 'VIEW'
+ """
+ ).strip()
params = {}
engine = cls.get_engine(database, schema=schema)
+
with closing(engine.raw_connection()) as conn:
cursor = conn.cursor()
cursor.execute(sql, params)
results = cursor.fetchall()
- return [row[0] for row in results]
+ return sorted([row[0] for row in results])
@classmethod
def _create_column_info(
@@ -1087,7 +1125,7 @@ class PrestoEngineSpec(PrestoBaseEngineSpec):
else f"SHOW PARTITIONS FROM {table_name}"
)
- sql = textwrap.dedent(
+ sql = dedent(
f"""\
{partition_select_clause}
{where_clause}
diff --git a/tests/integration_tests/db_engine_specs/hive_tests.py
b/tests/integration_tests/db_engine_specs/hive_tests.py
index ad80f8397f..991d3f759c 100644
--- a/tests/integration_tests/db_engine_specs/hive_tests.py
+++ b/tests/integration_tests/db_engine_specs/hive_tests.py
@@ -150,10 +150,6 @@ def test_hive_error_msg():
)
-def test_hive_get_view_names_return_empty_list(): # pylint:
disable=invalid-name
- assert HiveEngineSpec.get_view_names(mock.ANY, mock.ANY, mock.ANY) == []
-
-
def test_convert_dttm():
dttm = datetime.strptime("2019-01-02 03:04:05.678900", "%Y-%m-%d
%H:%M:%S.%f")
assert HiveEngineSpec.convert_dttm("DATE", dttm) == "CAST('2019-01-02' AS
DATE)"
diff --git a/tests/integration_tests/db_engine_specs/presto_tests.py
b/tests/integration_tests/db_engine_specs/presto_tests.py
index 2d6cf7b862..d37a04645f 100644
--- a/tests/integration_tests/db_engine_specs/presto_tests.py
+++ b/tests/integration_tests/db_engine_specs/presto_tests.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
from collections import namedtuple
+from textwrap import dedent
from unittest import mock, skipUnless
import pandas as pd
@@ -33,49 +34,47 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec):
def test_get_datatype_presto(self):
self.assertEqual("STRING", PrestoEngineSpec.get_datatype("string"))
- def test_presto_get_view_names_return_empty_list(
- self,
- ): # pylint: disable=invalid-name
- self.assertEqual(
- [], PrestoEngineSpec.get_view_names(mock.ANY, mock.ANY, mock.ANY)
- )
-
- @mock.patch("superset.db_engine_specs.presto.is_feature_enabled")
- def test_get_view_names(self, mock_is_feature_enabled):
- mock_is_feature_enabled.return_value = True
- mock_execute = mock.MagicMock()
- mock_fetchall = mock.MagicMock(return_value=[["a", "b,", "c"], ["d",
"e"]])
+ def test_get_view_names_with_schema(self):
database = mock.MagicMock()
+ mock_execute = mock.MagicMock()
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.execute
= (
mock_execute
)
-
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.fetchall
= (
- mock_fetchall
+
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.fetchall
= mock.MagicMock(
+ return_value=[["a", "b,", "c"], ["d", "e"]]
)
- result = PrestoEngineSpec.get_view_names(database, mock.Mock(), None)
+ schema = "schema"
+ result = PrestoEngineSpec.get_view_names(database, mock.Mock(), schema)
mock_execute.assert_called_once_with(
- "SELECT table_name FROM information_schema.views", {}
+ dedent(
+ """
+ SELECT table_name FROM information_schema.tables
+ WHERE table_schema = %(schema)s
+ AND table_type = 'VIEW'
+ """
+ ).strip(),
+ {"schema": schema},
)
assert result == ["a", "d"]
- @mock.patch("superset.db_engine_specs.presto.is_feature_enabled")
- def test_get_view_names_with_schema(self, mock_is_feature_enabled):
- mock_is_feature_enabled.return_value = True
- mock_execute = mock.MagicMock()
- mock_fetchall = mock.MagicMock(return_value=[["a", "b,", "c"], ["d",
"e"]])
+ def test_get_view_names_without_schema(self):
database = mock.MagicMock()
+ mock_execute = mock.MagicMock()
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.execute
= (
mock_execute
)
-
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.fetchall
= (
- mock_fetchall
+
database.get_sqla_engine.return_value.raw_connection.return_value.cursor.return_value.fetchall
= mock.MagicMock(
+ return_value=[["a", "b,", "c"], ["d", "e"]]
)
- schema = "schema"
- result = PrestoEngineSpec.get_view_names(database, mock.Mock(), schema)
+ result = PrestoEngineSpec.get_view_names(database, mock.Mock(), None)
mock_execute.assert_called_once_with(
- "SELECT table_name FROM information_schema.views "
- "WHERE table_schema=%(schema)s",
- {"schema": schema},
+ dedent(
+ """
+ SELECT table_name FROM information_schema.tables
+ WHERE table_type = 'VIEW'
+ """
+ ).strip(),
+ {},
)
assert result == ["a", "d"]
@@ -663,50 +662,17 @@ class TestPrestoDbEngineSpec(TestDbEngineSpec):
sqla_type = PrestoEngineSpec.get_sqla_column_type(None)
assert sqla_type is None
- @mock.patch(
-
"superset.utils.feature_flag_manager.FeatureFlagManager.is_feature_enabled"
- )
@mock.patch("superset.db_engine_specs.base.BaseEngineSpec.get_table_names")
@mock.patch("superset.db_engine_specs.presto.PrestoEngineSpec.get_view_names")
- def test_get_table_names_no_split_views_from_tables(
- self, mock_get_view_names, mock_get_table_names,
mock_is_feature_enabled
- ):
- mock_get_view_names.return_value = ["view1", "view2"]
- table_names = ["table1", "table2", "view1", "view2"]
- mock_get_table_names.return_value = table_names
- mock_is_feature_enabled.return_value = False
- tables = PrestoEngineSpec.get_table_names(mock.Mock(), mock.Mock(),
None)
- assert tables == table_names
-
- @mock.patch(
-
"superset.utils.feature_flag_manager.FeatureFlagManager.is_feature_enabled"
- )
- @mock.patch("superset.db_engine_specs.base.BaseEngineSpec.get_table_names")
-
@mock.patch("superset.db_engine_specs.presto.PrestoEngineSpec.get_view_names")
- def test_get_table_names_split_views_from_tables(
- self, mock_get_view_names, mock_get_table_names,
mock_is_feature_enabled
+ def test_get_table_names(
+ self,
+ mock_get_view_names,
+ mock_get_table_names,
):
mock_get_view_names.return_value = ["view1", "view2"]
- table_names = ["table1", "table2", "view1", "view2"]
- mock_get_table_names.return_value = table_names
- mock_is_feature_enabled.return_value = True
- tables = PrestoEngineSpec.get_table_names(mock.Mock(), mock.Mock(),
None)
- assert sorted(tables) == sorted(table_names)
-
- @mock.patch(
-
"superset.utils.feature_flag_manager.FeatureFlagManager.is_feature_enabled"
- )
- @mock.patch("superset.db_engine_specs.base.BaseEngineSpec.get_table_names")
-
@mock.patch("superset.db_engine_specs.presto.PrestoEngineSpec.get_view_names")
- def test_get_table_names_split_views_from_tables_no_tables(
- self, mock_get_view_names, mock_get_table_names,
mock_is_feature_enabled
- ):
- mock_get_view_names.return_value = []
- table_names = []
- mock_get_table_names.return_value = table_names
- mock_is_feature_enabled.return_value = True
+ mock_get_table_names.return_value = ["table1", "table2", "view1",
"view2"]
tables = PrestoEngineSpec.get_table_names(mock.Mock(), mock.Mock(),
None)
- assert tables == []
+ assert tables == ["table1", "table2"]
def test_get_full_name(self):
names = [