This is an automated email from the ASF dual-hosted git repository.
maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new f742b98 Making thrift, pyhive and tableschema as extra_requires
(#6696)
f742b98 is described below
commit f742b9876bc2787b7b2228fa3ce9a515da92f275
Author: Maxime Beauchemin <[email protected]>
AuthorDate: Sat Jan 19 14:27:18 2019 -0800
Making thrift, pyhive and tableschema as extra_requires (#6696)
* Making thrift, pyhive and tableschema as extra_requires
Looking at the dependency tree for license related questions, I noticed
that tableschema had a huge tree, and only people running Hive really
need it. Making this as well as pyhive and thrift optional.
Also bumping some python dependencies
* Run pip-compile
* Removing refs to past.builtins (from future lib)
* Add thrift
---
UPDATING.md | 6 ++++++
requirements-dev.txt | 2 ++
requirements.txt | 34 +++-------------------------------
setup.py | 14 ++++++++------
superset/connectors/base/models.py | 3 +--
superset/connectors/sqla/views.py | 3 +--
superset/dataframe.py | 5 ++---
superset/db_engine_specs.py | 8 ++++----
superset/db_engines/hive.py | 9 +++++----
superset/utils/core.py | 10 ++++------
superset/viz.py | 5 ++---
tests/celery_tests.py | 4 +---
12 files changed, 39 insertions(+), 64 deletions(-)
diff --git a/UPDATING.md b/UPDATING.md
index 97ed710..03ae1da 100644
--- a/UPDATING.md
+++ b/UPDATING.md
@@ -3,6 +3,12 @@
This file documents any backwards-incompatible changes in Superset and
assists people when migrating to a new version.
+## Superset 0.32.0
+* If you use `Hive` or `Presto`, we've moved some dependencies that were
+ in the main package as optional now. To get these packages,
+ run `pip install superset[presto]` and/or `pip install superset[hive]` as
+ required.
+
## Superset 0.31.0
* boto3 / botocore was removed from the dependency list. If you use s3
as a place to store your SQL Lab result set or Hive uploads, you may
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 0ac1c57..dc564fb 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -9,8 +9,10 @@ mysqlclient==1.3.13
pip-tools==3.1.0
psycopg2-binary==2.7.5
pycodestyle==2.4.0
+pyhive==0.6.1
pylint==1.9.2
python-dotenv==0.10.1
redis==2.10.6
statsd==3.3.0
+thrift==0.11.0
tox==3.5.3
diff --git a/requirements.txt b/requirements.txt
index c503e27..9767f28 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -7,11 +7,9 @@
alembic==1.0.0 # via flask-migrate
amqp==2.3.2 # via kombu
asn1crypto==0.24.0 # via cryptography
-babel==2.6.0 # via flask-babel, flower
+babel==2.6.0 # via flask-babel
billiard==3.5.0.4 # via celery
bleach==3.0.2
-cachetools==3.0.0 # via google-auth
-cchardet==1.0.0 # via tabulator
celery==4.2.0
certifi==2018.8.24 # via requests
cffi==1.11.5 # via cryptography
@@ -23,7 +21,6 @@ croniter==0.3.26
cryptography==2.4.2
decorator==4.3.0 # via retry
defusedxml==0.5.0 # via python3-openid
-et-xmlfile==1.0.1 # via openpyxl
flask-appbuilder==1.12.1
flask-babel==0.11.1 # via flask-appbuilder
flask-caching==1.4.0
@@ -34,67 +31,42 @@ flask-openid==1.2.5 # via flask-appbuilder
flask-sqlalchemy==2.3.2 # via flask-appbuilder, flask-migrate
flask-wtf==0.14.2
flask==1.0.2
-flower==0.9.2
-future==0.16.0 # via pyhive
geopy==1.11.0
-google-auth==1.6.1 # via gsheetsdb
-gsheetsdb==0.1.9
gunicorn==19.8.0
humanize==0.5.1
idna==2.6
-ijson==2.3 # via tabulator
isodate==0.6.0
itsdangerous==0.24 # via flask
-jdcal==1.4 # via openpyxl
jinja2==2.10 # via flask, flask-babel
-jsonlines==1.2.0 # via tabulator
-jsonschema==2.6.0 # via tableschema
kombu==4.2.1 # via celery
-linear-tsv==1.1.0 # via tabulator
mako==1.0.7 # via alembic
markdown==3.0
markupsafe==1.0 # via jinja2, mako
-mo-future==2.20.18317 # via moz-sql-parser
-moz-sql-parser==2.19.18318 # via gsheetsdb
numpy==1.15.2 # via pandas
-openpyxl==2.4.11 # via tabulator
pandas==0.23.1
parsedatetime==2.0.0
pathlib2==2.3.0
polyline==1.3.2
py==1.7.0 # via retry
-pyasn1-modules==0.2.2 # via google-auth
-pyasn1==0.4.4 # via pyasn1-modules, rsa
pycparser==2.19 # via cffi
pydruid==0.5.0
-pyhive==0.5.1
-pyparsing==2.3.0 # via moz-sql-parser
python-dateutil==2.6.1
python-editor==1.0.3 # via alembic
python-geohash==0.8.5
python3-openid==3.1.0 # via flask-openid
-pytz==2018.5 # via babel, celery, flower, pandas
+pytz==2018.5 # via babel, celery, pandas
pyyaml==3.13
requests==2.20.0
retry==0.9.2
-rfc3986==1.1.0 # via tableschema
-rsa==4.0 # via google-auth
-sasl==0.2.1 # via thrift-sasl
selenium==3.141.0
simplejson==3.15.0
-six==1.11.0 # via bleach, cryptography, google-auth, gsheetsdb,
isodate, jsonlines, linear-tsv, pathlib2, polyline, pydruid, python-dateutil,
sasl, sqlalchemy-utils, tableschema, tabulator, thrift
+six==1.11.0 # via bleach, cryptography, isodate, pathlib2,
polyline, pydruid, python-dateutil, sqlalchemy-utils
sqlalchemy-utils==0.32.21
sqlalchemy==1.2.2
sqlparse==0.2.4
-tableschema==1.1.0
-tabulator==1.15.0 # via tableschema
-thrift-sasl==0.3.0
-thrift==0.11.0
-tornado==5.1.1 # via flower
unicodecsv==0.14.1
urllib3==1.22 # via requests, selenium
vine==1.1.4 # via amqp
webencodings==0.5.1 # via bleach
werkzeug==0.14.1 # via flask
wtforms==2.2.1 # via flask-wtf
-xlrd==1.1.0 # via tabulator
diff --git a/setup.py b/setup.py
index 4b05139..01e4d3d 100644
--- a/setup.py
+++ b/setup.py
@@ -82,9 +82,7 @@ setup(
'flask-compress',
'flask-migrate',
'flask-wtf',
- 'flower', # deprecated
'geopy',
- 'gsheetsdb>=0.1.9',
'gunicorn', # deprecated
'humanize',
'idna',
@@ -95,7 +93,6 @@ setup(
'pathlib2',
'polyline',
'pydruid>=0.4.3',
- 'pyhive>=0.4.0',
'python-dateutil',
'python-geohash',
'pyyaml>=3.13',
@@ -106,14 +103,19 @@ setup(
'sqlalchemy',
'sqlalchemy-utils',
'sqlparse',
- 'tableschema',
- 'thrift>=0.9.3',
- 'thrift-sasl>=0.2.1',
'unicodecsv',
],
extras_require={
'cors': ['flask-cors>=2.0.0'],
'console_log': ['console_log==0.2.10'],
+ 'hive': [
+ 'pyhive>=0.4.0',
+ 'tableschema',
+ 'thrift-sasl>=0.2.1',
+ 'thrift>=0.9.3',
+ ],
+ 'presto': ['pyhive>=0.4.0'],
+ 'gsheets': ['gsheetsdb>=0.1.9'],
},
author='Apache Software Foundation',
author_email='[email protected]',
diff --git a/superset/connectors/base/models.py
b/superset/connectors/base/models.py
index 50ef6d8..39cc585 100644
--- a/superset/connectors/base/models.py
+++ b/superset/connectors/base/models.py
@@ -17,7 +17,6 @@
# pylint: disable=C,R,W
import json
-from past.builtins import basestring
from sqlalchemy import (
and_, Boolean, Column, Integer, String, Text,
)
@@ -218,7 +217,7 @@ class BaseDatasource(AuditMixinNullable, ImportMixin):
values, target_column_is_numeric=False, is_list_target=False):
def handle_single_value(v):
# backward compatibility with previous <select> components
- if isinstance(v, basestring):
+ if isinstance(v, str):
v = v.strip('\t\n \'"')
if target_column_is_numeric:
# For backwards compatibility and edge cases
diff --git a/superset/connectors/sqla/views.py
b/superset/connectors/sqla/views.py
index 2a14fa3..212d551 100644
--- a/superset/connectors/sqla/views.py
+++ b/superset/connectors/sqla/views.py
@@ -23,7 +23,6 @@ from flask_appbuilder.models.sqla.interface import
SQLAInterface
from flask_appbuilder.security.decorators import has_access
from flask_babel import gettext as __
from flask_babel import lazy_gettext as _
-from past.builtins import basestring
from superset import appbuilder, db, security_manager
from superset.connectors.base.views import DatasourceModelView
@@ -301,7 +300,7 @@ class TableModelView(DatasourceModelView, DeleteMixin,
YamlExportMixin): # noqa
def edit(self, pk):
"""Simple hack to redirect to explore view after saving"""
resp = super(TableModelView, self).edit(pk)
- if isinstance(resp, basestring):
+ if isinstance(resp, str):
return resp
return redirect('/superset/explore/table/{}/'.format(pk))
diff --git a/superset/dataframe.py b/superset/dataframe.py
index 69fcc53..5cd8ba9 100644
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -29,7 +29,6 @@ import numpy as np
import pandas as pd
from pandas.core.common import _maybe_box_datetimelike
from pandas.core.dtypes.dtypes import ExtensionDtype
-from past.builtins import basestring
from superset.utils.core import JS_MAX_INTEGER
@@ -144,7 +143,7 @@ class SupersetDataFrame(object):
def is_date(np_dtype, db_type_str):
def looks_daty(s):
- if isinstance(s, basestring):
+ if isinstance(s, str):
return any([s.lower().startswith(ss) for ss in ('time',
'date')])
return False
@@ -203,7 +202,7 @@ class SupersetDataFrame(object):
if not db_type_str or db_type_str.upper() == 'OBJECT':
v = sample[col].iloc[0] if not sample[col].empty else None
- if isinstance(v, basestring):
+ if isinstance(v, str):
column['type'] = 'STRING'
elif isinstance(v, int):
column['type'] = 'INT'
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 00643c0..f724781 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -40,7 +40,6 @@ import time
from flask import g
from flask_babel import lazy_gettext as _
import pandas
-from past.builtins import basestring
import sqlalchemy as sqla
from sqlalchemy import Column, select
from sqlalchemy.engine import create_engine
@@ -48,7 +47,6 @@ from sqlalchemy.engine.url import make_url
from sqlalchemy.sql import quoted_name, text
from sqlalchemy.sql.expression import TextAsFrom
import sqlparse
-from tableschema import Table
from werkzeug.utils import secure_filename
from superset import app, conf, db, sql_parse
@@ -143,7 +141,7 @@ class BaseEngineSpec(object):
@classmethod
def get_datatype(cls, type_code):
- if isinstance(type_code, basestring) and len(type_code):
+ if isinstance(type_code, str) and len(type_code):
return type_code.upper()
@classmethod
@@ -709,7 +707,7 @@ class MySQLEngineSpec(BaseEngineSpec):
datatype = type_code
if isinstance(type_code, int):
datatype = cls.type_code_map.get(type_code)
- if datatype and isinstance(datatype, basestring) and len(datatype):
+ if datatype and isinstance(datatype, str) and len(datatype):
return datatype
@classmethod
@@ -1123,6 +1121,8 @@ class HiveEngineSpec(PrestoEngineSpec):
upload_path = config['UPLOAD_FOLDER'] + \
secure_filename(filename)
+ # Optional dependency
+ from tableschema import Table # pylint: disable=import-error
hive_table_schema = Table(upload_path).infer()
column_name_and_type = []
for column_info in hive_table_schema['fields']:
diff --git a/superset/db_engines/hive.py b/superset/db_engines/hive.py
index f0f8995..6334257 100644
--- a/superset/db_engines/hive.py
+++ b/superset/db_engines/hive.py
@@ -15,14 +15,11 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
-from pyhive import hive # pylint: disable=no-name-in-module
-from TCLIService import ttypes
-from thrift import Thrift
# TODO: contribute back to pyhive.
def fetch_logs(self, max_rows=1024,
- orientation=ttypes.TFetchOrientation.FETCH_NEXT):
+ orientation=None):
"""Mocked. Retrieve the logs produced by the execution of the query.
Can be called multiple times to fetch the logs produced after
the previous call.
@@ -31,6 +28,10 @@ def fetch_logs(self, max_rows=1024,
.. note::
This is not a part of DB-API.
"""
+ from pyhive import hive
+ from TCLIService import ttypes
+ from thrift import Thrift
+ orientation = orientation or ttypes.TFetchOrientation.FETCH_NEXT
try:
req = ttypes.TGetLogReq(operationHandle=self._operationHandle)
logs = self._connection.client.GetLog(req).log
diff --git a/superset/utils/core.py b/superset/utils/core.py
index 2fb3bd6..3e38ea5 100644
--- a/superset/utils/core.py
+++ b/superset/utils/core.py
@@ -16,7 +16,6 @@
# under the License.
# pylint: disable=C,R,W
"""Utility functions used across Superset"""
-from builtins import object
from datetime import date, datetime, time, timedelta
import decimal
from email.mime.application import MIMEApplication
@@ -48,7 +47,6 @@ import markdown as md
import numpy
import pandas as pd
import parsedatetime
-from past.builtins import basestring
from pydruid.utils.having import Having
import sqlalchemy as sa
from sqlalchemy import event, exc, select, Text
@@ -88,7 +86,7 @@ def flasher(msg, severity=None):
logging.info(msg)
-class _memoized(object): # noqa
+class _memoized: # noqa
"""Decorator that caches a function's return value each time it is called
If called later with the same arguments, the cached value is returned, and
@@ -503,7 +501,7 @@ def table_has_constraint(table, name, db):
return False
-class timeout(object):
+class timeout:
"""
To be used in a ``with`` block and timeout its content.
"""
@@ -569,7 +567,7 @@ def pessimistic_connection_handling(some_engine):
connection.should_close_with_result = save_should_close_with_result
-class QueryStatus(object):
+class QueryStatus:
"""Enum-type class for query statuses"""
STOPPED = 'stopped'
@@ -678,7 +676,7 @@ def send_MIME_email(e_from, e_to, mime_msg, config,
dryrun=False):
def get_email_address_list(address_string):
- if isinstance(address_string, basestring):
+ if isinstance(address_string, str):
if ',' in address_string:
address_string = address_string.split(',')
elif '\n' in address_string:
diff --git a/superset/viz.py b/superset/viz.py
index 3bdeb79..a548bc0 100644
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -43,7 +43,6 @@ from markdown import markdown
import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
-from past.builtins import basestring
import polyline
import simplejson as json
@@ -1612,8 +1611,8 @@ class SankeyViz(BaseViz):
def get_data(self, df):
df.columns = ['source', 'target', 'value']
- df['source'] = df['source'].astype(basestring)
- df['target'] = df['target'].astype(basestring)
+ df['source'] = df['source'].astype(str)
+ df['target'] = df['target'].astype(str)
recs = df.to_dict(orient='records')
hierarchy = defaultdict(set)
diff --git a/tests/celery_tests.py b/tests/celery_tests.py
index dbbdf08..80b4101 100644
--- a/tests/celery_tests.py
+++ b/tests/celery_tests.py
@@ -20,8 +20,6 @@ import subprocess
import time
import unittest
-from past.builtins import basestring
-
from superset import app, db
from superset.models.helpers import QueryStatus
from superset.models.sql_lab import Query
@@ -239,7 +237,7 @@ class CeleryTestCase(SupersetTestCase):
@staticmethod
def de_unicode_dict(d):
def str_if_basestring(o):
- if isinstance(o, basestring):
+ if isinstance(o, str):
return str(o)
return o
return {str_if_basestring(k): str_if_basestring(d[k]) for k in d}