This is an automated email from the ASF dual-hosted git repository.

rusackas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/superset.git


The following commit(s) were added to refs/heads/master by this push:
     new 97121465dd feat: Add Apache Doris support (#24714)
97121465dd is described below

commit 97121465ddf772013604ffdb5d7378885bc6ee26
Author: Jiwen liu <[email protected]>
AuthorDate: Wed Nov 22 03:42:10 2023 +0800

    feat: Add Apache Doris support (#24714)
    
    Co-authored-by: Evan Rusackas <[email protected]>
---
 README.md                                          |   1 +
 docs/docs/databases/doris.mdx                      |  26 ++
 .../docs/databases/installing-database-drivers.mdx |   1 +
 docs/src/resources/data.js                         |   5 +
 docs/static/img/databases/doris.png                | Bin 0 -> 11539 bytes
 setup.py                                           |   1 +
 superset-frontend/src/assets/images/doris.png      | Bin 0 -> 11539 bytes
 superset/db_engine_specs/doris.py                  | 278 +++++++++++++++++++++
 tests/unit_tests/db_engine_specs/test_doris.py     | 147 +++++++++++
 9 files changed, 459 insertions(+)

diff --git a/README.md b/README.md
index 757c0fb503..3588d99419 100644
--- a/README.md
+++ b/README.md
@@ -130,6 +130,7 @@ Here are some of the major database solutions that are 
supported:
   <img src="superset-frontend/src/assets/images/yugabyte.png" alt="yugabyte" 
border="0" width="200" height="80"/>
   <img src="superset-frontend/src/assets/images/databend.png" alt="databend" 
border="0" width="200" height="80"/>
   <img src="superset-frontend/src/assets/images/starrocks.png" alt="starrocks" 
border="0" width="200" height="80"/>
+  <img src="superset-frontend/src/assets/images/doris.png" alt="doris" 
border="0" width="200" height="80"/>
 </p>
 
 **A more comprehensive list of supported databases** along with the 
configuration instructions can be found 
[here](https://superset.apache.org/docs/databases/installing-database-drivers).
diff --git a/docs/docs/databases/doris.mdx b/docs/docs/databases/doris.mdx
new file mode 100644
index 0000000000..62c16afeb3
--- /dev/null
+++ b/docs/docs/databases/doris.mdx
@@ -0,0 +1,26 @@
+---
+title: Apache Doris
+hide_title: true
+sidebar_position: 5
+version: 1
+---
+
+## Doris
+
+The [sqlalchemy-doris](https://pypi.org/project/pydoris/) library is the 
recommended way to connect to Apache Doris through SQLAlchemy.
+
+You'll need the following setting values to form the connection string:
+
+- **User**: User Name
+- **Password**: Password
+- **Host**: Doris FE Host
+- **Port**: Doris FE port
+- **Catalog**: Catalog Name
+- **Database**: Database Name
+
+
+Here's what the connection string looks like:
+
+```
+doris://<User>:<Password>@<Host>:<Port>/<Catalog>.<Database>
+```
diff --git a/docs/docs/databases/installing-database-drivers.mdx 
b/docs/docs/databases/installing-database-drivers.mdx
index f698b7ab8e..f11b4ec5eb 100644
--- a/docs/docs/databases/installing-database-drivers.mdx
+++ b/docs/docs/databases/installing-database-drivers.mdx
@@ -25,6 +25,7 @@ Some of the recommended packages are shown below. Please 
refer to [setup.py](htt
 | Database                                                  | PyPI package     
                                                                  | Connection 
String                                                                          
                                                            |
 | --------------------------------------------------------- | 
----------------------------------------------------------------------------------
 | 
------------------------------------------------------------------------------------------------------------------------------------------------------
 |
 | [Amazon Athena](/docs/databases/athena)                   | `pip install 
pyathena[pandas]` , `pip install PyAthenaJDBC`                        | 
`awsathena+rest://{aws_access_key_id}:{aws_secret_access_key}@athena.{region_name}.amazonaws.com/{schema_name}?s3_staging_dir={s3_staging_dir}&...
 `   |
+| [Apache Doris](/docs/databases/doris)                     | `pip install 
pydoris`                                                              | 
`doris://<User>:<Password>@<Host>:<Port>/<Catalog>.<Database>`                  
                          |
 | [Amazon DynamoDB](/docs/databases/dynamodb)               | `pip install 
pydynamodb`                                                           | 
`dynamodb://{access_key_id}:{secret_access_key}@dynamodb.{region_name}.amazonaws.com?connector=superset`
                                               |
 | [Amazon Redshift](/docs/databases/redshift)               | `pip install 
sqlalchemy-redshift`                                                  | ` 
redshift+psycopg2://<userName>:<DBPassword>@<AWS End Point>:5439/<Database 
Name>`                                                                    |
 | [Apache Drill](/docs/databases/drill)                     | `pip install 
sqlalchemy-drill`                                                     | 
`drill+sadrill:// For JDBC drill+jdbc://`                                       
                                                                       |
diff --git a/docs/src/resources/data.js b/docs/src/resources/data.js
index a07be55267..42cf835a49 100644
--- a/docs/src/resources/data.js
+++ b/docs/src/resources/data.js
@@ -117,4 +117,9 @@ export const Databases = [
     href: 'https://www.microsoft.com/en-us/sql-server',
     imgName: 'msql.png',
   },
+  {
+    title: 'Apache Doris',
+    href: 'https://doris.apache.org/',
+    imgName: 'doris.png',
+  },
 ];
diff --git a/docs/static/img/databases/doris.png 
b/docs/static/img/databases/doris.png
new file mode 100644
index 0000000000..4d88f2a36c
Binary files /dev/null and b/docs/static/img/databases/doris.png differ
diff --git a/setup.py b/setup.py
index e4d437b4d1..29df567e04 100644
--- a/setup.py
+++ b/setup.py
@@ -205,6 +205,7 @@ setup(
         "vertica": ["sqlalchemy-vertica-python>=0.5.9, < 0.6"],
         "netezza": ["nzalchemy>=11.0.2"],
         "starrocks": ["starrocks>=1.0.0"],
+        "doris": ["pydoris>=1.0.0, <2.0.0"],
     },
     python_requires="~=3.9",
     author="Apache Software Foundation",
diff --git a/superset-frontend/src/assets/images/doris.png 
b/superset-frontend/src/assets/images/doris.png
new file mode 100644
index 0000000000..4d88f2a36c
Binary files /dev/null and b/superset-frontend/src/assets/images/doris.png 
differ
diff --git a/superset/db_engine_specs/doris.py 
b/superset/db_engine_specs/doris.py
new file mode 100644
index 0000000000..e502f5bda2
--- /dev/null
+++ b/superset/db_engine_specs/doris.py
@@ -0,0 +1,278 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import logging
+import re
+from re import Pattern
+from typing import Any, Optional
+from urllib import parse
+
+from flask_babel import gettext as __
+from sqlalchemy import Float, Integer, Numeric, String, TEXT, types
+from sqlalchemy.engine.url import URL
+from sqlalchemy.sql.type_api import TypeEngine
+
+from superset.db_engine_specs.mysql import MySQLEngineSpec
+from superset.errors import SupersetErrorType
+from superset.utils.core import GenericDataType
+
+# Regular expressions to catch custom errors
+CONNECTION_ACCESS_DENIED_REGEX = re.compile(
+    "Access denied for user '(?P<username>.*?)'"
+)
+CONNECTION_INVALID_HOSTNAME_REGEX = re.compile(
+    "Unknown Doris server host '(?P<hostname>.*?)'"
+)
+CONNECTION_UNKNOWN_DATABASE_REGEX = re.compile("Unknown database 
'(?P<database>.*?)'")
+CONNECTION_HOST_DOWN_REGEX = re.compile(
+    "Can't connect to Doris server on '(?P<hostname>.*?)'"
+)
+SYNTAX_ERROR_REGEX = re.compile(
+    "check the manual that corresponds to your MySQL server "
+    "version for the right syntax to use near '(?P<server_error>.*)"
+)
+
+logger = logging.getLogger(__name__)
+
+
+class TINYINT(Integer):
+    __visit_name__ = "TINYINT"
+
+
+class LARGEINT(Integer):
+    __visit_name__ = "LARGEINT"
+
+
+class DOUBLE(Float):
+    __visit_name__ = "DOUBLE"
+
+
+class HLL(Numeric):
+    __visit_name__ = "HLL"
+
+
+class BITMAP(Numeric):
+    __visit_name__ = "BITMAP"
+
+
+class QuantileState(Numeric):
+    __visit_name__ = "QUANTILE_STATE"
+
+
+class AggState(Numeric):
+    __visit_name__ = "AGG_STATE"
+
+
+class ARRAY(TypeEngine):
+    __visit_name__ = "ARRAY"
+
+    @property
+    def python_type(self) -> Optional[type[list[Any]]]:
+        return list
+
+
+class MAP(TypeEngine):
+    __visit_name__ = "MAP"
+
+    @property
+    def python_type(self) -> Optional[type[dict[Any, Any]]]:
+        return dict
+
+
+class STRUCT(TypeEngine):
+    __visit_name__ = "STRUCT"
+
+    @property
+    def python_type(self) -> Optional[type[Any]]:
+        return None
+
+
+class DorisEngineSpec(MySQLEngineSpec):
+    engine = "pydoris"
+    engine_aliases = {"doris"}
+    engine_name = "Apache Doris"
+    max_column_name_length = 64
+    default_driver = "pydoris"
+    sqlalchemy_uri_placeholder = (
+        "doris://user:password@host:port/catalog.db[?key=value&key=value...]"
+    )
+    encryption_parameters = {"ssl": "0"}
+    supports_dynamic_schema = True
+
+    column_type_mappings = (  # type: ignore
+        (
+            re.compile(r"^tinyint", re.IGNORECASE),
+            TINYINT(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^largeint", re.IGNORECASE),
+            LARGEINT(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^decimal.*", re.IGNORECASE),
+            types.DECIMAL(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^double", re.IGNORECASE),
+            DOUBLE(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^varchar(\((\d+)\))*$", re.IGNORECASE),
+            types.VARCHAR(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^char(\((\d+)\))*$", re.IGNORECASE),
+            types.CHAR(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json.*", re.IGNORECASE),
+            types.JSON(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^binary.*", re.IGNORECASE),
+            types.BINARY(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^quantile_state", re.IGNORECASE),
+            QuantileState(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^agg_state.*", re.IGNORECASE),
+            AggState(),
+            GenericDataType.STRING,
+        ),
+        (re.compile(r"^hll", re.IGNORECASE), HLL(), GenericDataType.STRING),
+        (
+            re.compile(r"^bitmap", re.IGNORECASE),
+            BITMAP(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^array.*", re.IGNORECASE),
+            ARRAY(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^map.*", re.IGNORECASE),
+            MAP(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^struct.*", re.IGNORECASE),
+            STRUCT(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^datetime.*", re.IGNORECASE),
+            types.DATETIME(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^date.*", re.IGNORECASE),
+            types.DATE(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^text.*", re.IGNORECASE),
+            TEXT(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^string.*", re.IGNORECASE),
+            String(),
+            GenericDataType.STRING,
+        ),
+    )
+
+    custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, 
Any]]] = {
+        CONNECTION_ACCESS_DENIED_REGEX: (
+            __('Either the username "%(username)s" or the password is 
incorrect.'),
+            SupersetErrorType.CONNECTION_ACCESS_DENIED_ERROR,
+            {"invalid": ["username", "password"]},
+        ),
+        CONNECTION_INVALID_HOSTNAME_REGEX: (
+            __('Unknown Doris server host "%(hostname)s".'),
+            SupersetErrorType.CONNECTION_INVALID_HOSTNAME_ERROR,
+            {"invalid": ["host"]},
+        ),
+        CONNECTION_HOST_DOWN_REGEX: (
+            __('The host "%(hostname)s" might be down and can\'t be reached.'),
+            SupersetErrorType.CONNECTION_HOST_DOWN_ERROR,
+            {"invalid": ["host", "port"]},
+        ),
+        CONNECTION_UNKNOWN_DATABASE_REGEX: (
+            __('Unable to connect to database "%(database)s".'),
+            SupersetErrorType.CONNECTION_UNKNOWN_DATABASE_ERROR,
+            {"invalid": ["database"]},
+        ),
+        SYNTAX_ERROR_REGEX: (
+            __(
+                'Please check your query for syntax errors near 
"%(server_error)s". '
+                "Then, try running your query again."
+            ),
+            SupersetErrorType.SYNTAX_ERROR,
+            {},
+        ),
+    }
+
+    @classmethod
+    def adjust_engine_params(
+        cls,
+        uri: URL,
+        connect_args: dict[str, Any],
+        catalog: Optional[str] = None,
+        schema: Optional[str] = None,
+    ) -> tuple[URL, dict[str, Any]]:
+        database = uri.database
+        if schema and database:
+            schema = parse.quote(schema, safe="")
+            if "." in database:
+                database = database.split(".")[0] + "." + schema
+            else:
+                database = "internal." + schema
+            uri = uri.set(database=database)
+
+        return uri, connect_args
+
+    @classmethod
+    def get_schema_from_engine_params(
+        cls,
+        sqlalchemy_uri: URL,
+        connect_args: dict[str, Any],
+    ) -> Optional[str]:
+        """
+        Return the configured schema.
+
+        For doris the SQLAlchemy URI looks like this:
+
+            doris://localhost:9030/catalog.database
+
+        """
+        database = sqlalchemy_uri.database.strip("/")
+
+        if "." not in database:
+            return None
+
+        return parse.unquote(database.split(".")[1])
diff --git a/tests/unit_tests/db_engine_specs/test_doris.py 
b/tests/unit_tests/db_engine_specs/test_doris.py
new file mode 100644
index 0000000000..d7444f8d2d
--- /dev/null
+++ b/tests/unit_tests/db_engine_specs/test_doris.py
@@ -0,0 +1,147 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Optional
+
+import pytest
+from sqlalchemy import JSON, types
+from sqlalchemy.engine.url import make_url
+
+from superset.db_engine_specs.doris import (
+    AggState,
+    ARRAY,
+    BITMAP,
+    DOUBLE,
+    HLL,
+    LARGEINT,
+    MAP,
+    QuantileState,
+    STRUCT,
+    TINYINT,
+)
+from superset.utils.core import GenericDataType
+from tests.unit_tests.db_engine_specs.utils import assert_column_spec
+
+
[email protected](
+    "native_type,sqla_type,attrs,generic_type,is_dttm",
+    [
+        # Numeric
+        ("tinyint", TINYINT, None, GenericDataType.NUMERIC, False),
+        ("largeint", LARGEINT, None, GenericDataType.NUMERIC, False),
+        ("decimal(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, 
False),
+        ("decimalv3(38,18)", types.DECIMAL, None, GenericDataType.NUMERIC, 
False),
+        ("double", DOUBLE, None, GenericDataType.NUMERIC, False),
+        # String
+        ("char(10)", types.CHAR, None, GenericDataType.STRING, False),
+        ("varchar(65533)", types.VARCHAR, None, GenericDataType.STRING, False),
+        ("binary", types.BINARY, None, GenericDataType.STRING, False),
+        ("text", types.TEXT, None, GenericDataType.STRING, False),
+        ("string", types.String, None, GenericDataType.STRING, False),
+        # Date
+        ("datetimev2", types.DateTime, None, GenericDataType.STRING, False),
+        ("datev2", types.Date, None, GenericDataType.STRING, False),
+        # Complex type
+        ("array<varchar(65533)>", ARRAY, None, GenericDataType.STRING, False),
+        ("map<string,int>", MAP, None, GenericDataType.STRING, False),
+        ("struct<int,string>", STRUCT, None, GenericDataType.STRING, False),
+        ("json", JSON, None, GenericDataType.STRING, False),
+        ("jsonb", JSON, None, GenericDataType.STRING, False),
+        ("bitmap", BITMAP, None, GenericDataType.STRING, False),
+        ("hll", HLL, None, GenericDataType.STRING, False),
+        ("quantile_state", QuantileState, None, GenericDataType.STRING, False),
+        ("agg_state", AggState, None, GenericDataType.STRING, False),
+    ],
+)
+def test_get_column_spec(
+    native_type: str,
+    sqla_type: type[types.TypeEngine],
+    attrs: Optional[dict[str, Any]],
+    generic_type: GenericDataType,
+    is_dttm: bool,
+) -> None:
+    from superset.db_engine_specs.doris import DorisEngineSpec as spec
+
+    assert_column_spec(spec, native_type, sqla_type, attrs, generic_type, 
is_dttm)
+
+
[email protected](
+    "sqlalchemy_uri,connect_args,return_schema,return_connect_args",
+    [
+        (
+            "doris://user:password@host/db1",
+            {"param1": "some_value"},
+            "db1",
+            {"param1": "some_value"},
+        ),
+        (
+            "pydoris://user:password@host/db1",
+            {"param1": "some_value"},
+            "db1",
+            {"param1": "some_value"},
+        ),
+        (
+            "doris://user:password@host/catalog1.db1",
+            {"param1": "some_value"},
+            "catalog1.db1",
+            {"param1": "some_value"},
+        ),
+        (
+            "pydoris://user:password@host/catalog1.db1",
+            {"param1": "some_value"},
+            "catalog1.db1",
+            {"param1": "some_value"},
+        ),
+    ],
+)
+def test_adjust_engine_params(
+    sqlalchemy_uri: str,
+    connect_args: dict[str, Any],
+    return_schema: str,
+    return_connect_args: dict[str, Any],
+) -> None:
+    from superset.db_engine_specs.doris import DorisEngineSpec
+
+    url = make_url(sqlalchemy_uri)
+    returned_url, returned_connect_args = DorisEngineSpec.adjust_engine_params(
+        url, connect_args
+    )
+    assert returned_url.database == return_schema
+    assert returned_connect_args == return_connect_args
+
+
+def test_get_schema_from_engine_params() -> None:
+    """
+    Test the ``get_schema_from_engine_params`` method.
+    """
+    from superset.db_engine_specs.doris import DorisEngineSpec
+
+    assert (
+        DorisEngineSpec.get_schema_from_engine_params(
+            make_url("doris://localhost:9030/hive.test"),
+            {},
+        )
+        == "test"
+    )
+
+    assert (
+        DorisEngineSpec.get_schema_from_engine_params(
+            make_url("doris://localhost:9030/hive"),
+            {},
+        )
+        is None
+    )

Reply via email to