This is an automated email from the ASF dual-hosted git repository.
maximebeauchemin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-superset.git
The following commit(s) were added to refs/heads/master by this push:
new fc3b043 Add support for Apache Drill (#6610)
fc3b043 is described below
commit fc3b043462cf00e8415e8ece22f4d01b131b9cd7
Author: Charles S. Givre <[email protected]>
AuthorDate: Wed May 29 00:16:09 2019 -0400
Add support for Apache Drill (#6610)
* Add support for Apache Drill
* Updated Docs
* Removed Extraneous Functions
* Removed Extraneous Functions
* Final Mods
* Fixed Unit Test Error
* Fixed Epoch Conversion Functions
---
docs/installation.rst | 31 +++++++++++++++++++++++++++++++
superset/db_engine_specs.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+)
diff --git a/docs/installation.rst b/docs/installation.rst
index 3e2934a..3405b8a 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -392,6 +392,12 @@ Here's a list of some of the recommended packages.
| Pinot | ``pip install pinotdb`` |
``pinot+http://controller:5436/`` |
| | |
``query?server=http://controller:5983/`` |
+---------------+-------------------------------------+-------------------------------------------------+
+| Apache Drill | | For the REST API:``
|
+| | | ``drill+sadrill://``
|
+| | | For JDBC
|
+| | | ``drill+jdbc://``
|
++---------------+-------------------------------------+-------------------------------------------------+
+
Note that many other databases are supported, the main criteria being the
existence of a functional SqlAlchemy dialect and Python driver. Googling
@@ -449,6 +455,31 @@ Required environment variables: ::
See `Teradata SQLAlchemy <https://github.com/Teradata/sqlalchemy-teradata>`_.
+Apache Drill
+---------
+At the time of writing, the SQLAlchemy Dialect is not available on pypi and
must be downloaded here:
+`SQLAlchemy Drill <https://github.com/JohnOmernik/sqlalchemy-drill>`_
+
+Alternatively, you can install it completely from the command line as follows:
::
+
+ git clone https://github.com/JohnOmernik/sqlalchemy-drill
+ cd sqlalchemy-drill
+ python3 setup.py install
+
+Once that is done, you can connect to Drill in two ways, either via the REST
interface or by JDBC. If you are connecting via JDBC, you must have the
+Drill JDBC Driver installed.
+
+The basic connection string for Drill looks like this ::
+
+
drill+sadrill://{username}:{password}@{host}:{port}/{storage_plugin}?use_ssl=True
+
+If you are using JDBC to connect to Drill, the connection string looks like
this: ::
+
+ drill+jdbc://{username}:{password}@{host}:{port}/{storage_plugin}
+
+For a complete tutorial about how to use Apache Drill with Superset, see this
tutorial:
+`Visualize Anything with Superset and Drill
<http://thedataist.com/visualize-anything-with-superset-and-drill/>`_
+
Caching
-------
diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py
index 67aba12..04efef7 100644
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -724,6 +724,50 @@ class SqliteEngineSpec(BaseEngineSpec):
return sorted(inspector.get_table_names())
+class DrillEngineSpec(BaseEngineSpec):
+ """Engine spec for Apache Drill"""
+ engine = 'drill'
+
+ time_grain_functions = {
+ None: '{col}',
+ 'PT1S': "nearestDate({col}, 'SECOND')",
+ 'PT1M': "nearestDate({col}, 'MINUTE')",
+ 'PT15M': "nearestDate({col}, 'QUARTER_HOUR')",
+ 'PT0.5H': "nearestDate({col}, 'HALF_HOUR')",
+ 'PT1H': "nearestDate({col}, 'HOUR')",
+ 'P1D': 'TO_DATE({col})',
+ 'P1W': "nearestDate({col}, 'WEEK_SUNDAY')",
+ 'P1M': "nearestDate({col}, 'MONTH')",
+ 'P0.25Y': "nearestDate({col}, 'QUARTER')",
+ 'P1Y': "nearestDate({col}, 'YEAR')",
+ }
+
+ # Returns a function to convert a Unix timestamp in milliseconds to a date
+ @classmethod
+ def epoch_to_dttm(cls):
+ return cls.epoch_ms_to_dttm().replace('{col}', '({col}*1000)')
+
+ @classmethod
+ def epoch_ms_to_dttm(cls):
+ return 'TO_DATE({col})'
+
+ @classmethod
+ def convert_dttm(cls, target_type, dttm):
+ tt = target_type.upper()
+ if tt == 'DATE':
+ return "CAST('{}' AS DATE)".format(dttm.isoformat()[:10])
+ elif tt == 'TIMESTAMP':
+ return "CAST('{}' AS TIMESTAMP)".format(
+ dttm.strftime('%Y-%m-%d %H:%M:%S'))
+ return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))
+
+ @classmethod
+ def adjust_database_uri(cls, uri, selected_schema):
+ if selected_schema:
+ uri.database = parse.quote(selected_schema, safe='')
+ return uri
+
+
class MySQLEngineSpec(BaseEngineSpec):
engine = 'mysql'
max_column_name_length = 64