korbit-ai[bot] commented on code in PR #32887:
URL: https://github.com/apache/superset/pull/32887#discussion_r2016896536
##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping,
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+ engine_name = "SingleStore"
+
+ engine = "singlestoredb"
+ drivers = {"singlestoredb": "SingleStore Python client"}
+ default_driver = "singlestoredb"
+
+ limit_method = LimitMethod.FORCE_LIMIT
+ allows_joins = True
+ allows_subqueries = True
+ allows_alias_in_select = True
+ allows_alias_in_orderby = True
+ time_groupby_inline = False
+ allows_alias_to_source_column = True
+ allows_hidden_orderby_agg = True
+ allows_hidden_cc_in_orderby = False
+ allows_cte_in_subquery = True
+ allow_limit_clause = True
+ max_column_name_length = 256
+ allows_sql_comments = True
+ allows_escaped_colons = True
+ supports_file_upload = True
+ supports_dynamic_schema = True
+ disable_ssh_tunneling = False
+
+ sqlalchemy_uri_placeholder = (
+ "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+ )
+
+ _time_grain_expressions = {
+ None: "{col}",
+ TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+ TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+ TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+ TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+ TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+ TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+ TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+ TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+ }
+
+ column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+ (
+ re.compile(r"^tinyint", re.IGNORECASE),
+ types.SmallInteger(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^mediumint", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^year", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^bit", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(var)?binary", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^json", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^bson", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geographypoint", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geography", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^vector", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^enum", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^set", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ )
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ """
+ Get a list of function names that are able to be called on the
database.
+ Used for SQL Lab autocomplete.
+
+ :param database: The database to get functions for
+ :return: A list of function names usable in the database
+ """
+
+ functions = [
+ "ABS",
+ "ACOS",
+ "ADDTIME",
+ "AES_DECRYPT",
+ "AES_ENCRYPT",
+ "AGGREGATOR_ID",
+ "ANY_VALUE",
+ "APPROX_COUNT_DISTINCT",
+ "APPROX_COUNT_DISTINCT_ACCUMULATE",
+ "APPROX_COUNT_DISTINCT_COMBINE",
+ "APPROX_COUNT_DISTINCT_ESTIMATE",
+ "APPROX_GEOGRAPHY_INTERSECTS",
+ "APPROX_PERCENTILE",
+ "ASCII",
+ "ASIN",
+ "ATAN",
+ "ATAN2",
+ "ATAN",
+ "AVG",
+ "BETWEEN",
+ "NOT",
+ "BIN",
+ "BIN_TO_UUID",
+ "BINARY",
+ "BIT_AND",
+ "BIT_COUNT",
+ "BIT_OR",
+ "BIT_XOR",
+ "BM25",
+ "BSON_ARRAY_CONTAINS_BSON",
+ "BSON_ARRAY_PUSH",
+ "BSON_ARRAY_SLICE",
+ "BSON_BUILD_ARRAY",
+ "BSON_BUILD_OBJECT",
+ "BSON_COMPARE",
+ "BSON_EXTRACT_BIGINT",
+ "BSON_EXTRACT_BOOL",
+ "BSON_EXTRACT_BSON",
+ "BSON_EXTRACT_DATETIME",
+ "BSON_EXTRACT_DOUBLE",
+ "BSON_EXTRACT_STRING",
+ "BSON_GET_TYPE",
+ "BSON_INCLUDE_MASK",
+ "BSON_EXCLUDE_MASK",
+ "BSON_LENGTH",
+ "BSON_MATCH_ANY",
+ "BSON_MATCH_ANY_EXISTS",
+ "BSON_MERGE",
+ "BSON_NORMALIZE_",
+ *"BSON_SET_BSON",
+ "BSON_UNWIND",
+ "CASE",
+ "CEIL",
+ "CHAR",
+ "CHARACTER_LENGTH",
+ "CHARSET",
+ "COALESCE",
+ "CONCAT",
+ "CONCAT_WS",
+ "CONNECTION_ID",
+ "CONV",
+ "CAST",
+ "CONVERT",
+ "CONVERT_TZ",
+ "COS",
+ "COT",
+ "COUNT",
+ "CRC32",
+ "CURRENT_DATE",
+ "CURDATE",
+ "CURRENT_TIME",
+ "CURTIME",
+ "CURRENT_TIMESTAMP",
+ "DATABASE",
+ "DATE",
+ "DATE_ADD",
+ "DATE_FORMAT",
+ "DATE_SUB",
+ "DATE_TRUNC",
+ "DATEDIFF",
+ "DAY",
+ "DAYNAME",
+ "DAYOFWEEK",
+ "DAYOFYEAR",
+ "DECODE",
+ "DEGREES",
+ "DENSE_RANK",
+ "DOT_PRODUCT",
+ "ELT",
+ "ESTIMATED_QUERY_LEAF_MEMORY",
+ "ESTIMATED_QUERY_RUNTIME",
+ "EUCLIDEAN_DISTANCE",
+ "EXP",
+ "EXTRACT",
+ "FIELD",
+ "FIRST",
+ "FIRST_VALUE",
+ "FLOOR",
+ "FORMAT",
+ "FOUND_ROWS",
+ "FROM_BASE64",
+ "FROM_DAYS",
+ "FROM_UNIXTIME",
+ "GEOGRAPHY_AREA",
+ "GEOGRAPHY_CONTAINS",
+ "GEOGRAPHY_DISTANCE",
+ "GEOGRAPHY_INTERSECTS",
+ "GEOGRAPHY_LATITUDE",
+ "GEOGRAPHY_LENGTH",
+ "GEOGRAPHY_LONGITUDE",
+ "GEOGRAPHY_POINT",
+ "GEOGRAPHY_WITHIN_DISTANCE",
+ "GET_FORMAT",
+ "GREATEST",
+ "GROUP_CONCAT",
+ "HEX",
+ "HIGHLIGHT",
+ "HOUR",
+ "IF",
+ "IN",
+ "INET_ATON",
+ "INET_NTOA",
+ "INET6_ATON",
+ "INET6_NTOA",
+ "INITCAP",
+ "INSTR",
+ "IS_BSON_NULL",
+ "IS_UUID",
+ "ISNULL",
+ "ISNUMERIC",
+ "JSON_AGG",
+ "JSON_ARRAY_CONTAINS_DOUBLE",
+ "JSON_ARRAY_CONTAINS_STRING",
+ "JSON_ARRAY_CONTAINS_JSON",
+ "JSON_ARRAY_PACK",
+ "JSON_ARRAY_PUSH_DOUBLE",
+ "JSON_ARRAY_PUSH_STRING",
+ "JSON_ARRAY_PUSH_JSON",
+ "JSON_ARRAY_UNPACK",
+ "JSON_BUILD_ARRAY",
+ "JSON_BUILD_OBJECT",
+ "JSON_DELETE_KEY",
+ "JSON_EXTRACT_DOUBLE",
+ "JSON_EXTRACT_STRING",
+ "JSON_EXTRACT_JSON",
+ "JSON_EXTRACT_BIGINT",
+ "JSON_GET_TYPE",
+ "JSON_KEYS",
+ "JSON_LENGTH",
+ "JSON_MATCH_ANY",
+ "JSON_MERGE_PATCH",
+ "JSON_PRETTY",
+ "JSON_SET_DOUBLE",
+ "JSON_SET_STRING",
+ "JSON_SET_JSON",
+ "JSON_SPLICE_DOUBLE",
+ "JSON_SPLICE_STRING",
+ "JSON_SPLICE_JSON",
+ "JSON_TO_ARRAY",
+ "LAG",
+ "LAST",
+ "LAST_DAY",
+ "LAST_INSERT_ID",
+ "LAST_VALUE",
+ "LCASE",
+ "LEAD",
+ "LEAST",
+ "LEFT",
+ "LENGTH",
+ "LIKE",
+ "LN",
+ "LOCALTIMESTAMP",
+ "LOCATE",
+ "LOG",
+ "LOG10",
+ "LOG2",
+ "LPAD",
+ "LTRIM",
+ "MATCH",
+ "MAX",
+ "MD5",
+ "MEDIAN",
+ "MICROSECOND",
+ "MIN",
+ "MINUTE",
+ "MOD",
+ "MONTH",
+ "MONTHNAME",
+ "MONTHS_BETWEEN",
+ "NOPARAM",
+ "NOW",
+ "NTH_VALUE",
+ "NTILE",
+ "NULLIF",
+ "NVL",
+ "IFNULL",
+ "PERCENT_RANK",
+ "PERCENTILE_CONT",
+ "MEDIAN",
+ "PERCENTILE_DISC",
+ "PI",
+ "POW",
+ "QUARTER",
+ "QUOTE",
+ "RADIANS",
+ "RAND",
+ "RANK",
+ "REDUCE",
+ "REGEXP_INSTR",
+ "REGEXP_MATCH",
+ "REGEXP_REPLACE",
+ "REGEXP_SUBSTR",
+ "REPLACE",
+ "REVERSE",
+ "RIGHT",
+ "RLIKE",
+ "REGEXP",
+ "ROUND",
+ "ROW_COUNT",
+ "ROW_NUMBER",
+ "RPAD",
+ "RTRIM",
+ "SCALAR_VECTOR_MUL",
+ "SEC_TO_TIME",
+ "SECOND",
+ "SECRET",
+ "SET",
+ "SHA1",
+ "SHA2",
+ "SIGMOID",
+ "SIGN",
+ "SIN",
+ "SLEEP",
+ "SPLIT",
+ "SQRT",
+ "STD",
+ "STDDEV",
+ "STDDEV_POP",
+ "STDDEV_SAMP",
+ "STR_TO_DATE",
+ "strcmp",
+ "STRING_BYTES",
+ "SUBSTRING",
+ "SUBSTRING_INDEX",
+ "SUM",
+ "SYS_GUID",
+ "UUID",
+ "TAN",
+ "TIME",
+ "TIME_BUCKET",
+ "TIME_FORMAT",
+ "TIME_TO_SEC",
+ "TIMEDIFF",
+ "TIMESTAMP",
+ "TIMESTAMPADD",
+ "TIMESTAMPDIFF",
+ "TO_BASE64",
+ "TO_CHAR",
+ "TO_DATE",
+ "TO_DAYS",
+ "TO_JSON",
+ "TO_NUMBER",
+ "TO_SECONDS",
+ "TO_TIMESTAMP",
+ "TRIM",
+ "TRUNC",
+ "TRUNCATE",
+ "UCASE",
+ "UNHEX",
+ "UNIX_TIMESTAMP",
+ "USER",
+ "UTC_DATE",
+ "UTC_TIME",
+ "UTC_TIMESTAMP",
+ "UUID_TO_BIN",
+ "VARIANCE",
+ "VAR_SAMP",
+ "VECTOR_ADD",
+ "VECTOR_ELEMENTS_SUM",
+ "VECTOR_KTH_ELEMENT",
+ "VECTOR_MUL",
+ "VECTOR_NUM_ELEMENTS",
+ "VECTOR_SORT",
+ "VECTOR_SUB",
+ "VECTOR_SUBVECTOR",
+ "VECTOR_SUM",
+ "WEEK",
+ "WEEKDAY",
+ "YEAR",
+ ]
+
+ if (database_name := cls.get_default_schema(database, None)) is not
None:
+ df = database.get_df(
+ f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+ )
Review Comment:
### Inefficient Function Name Resolution <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
The get_function_names method maintains a large hardcoded list of functions
and then additionally queries the database to fetch more functions, potentially
duplicating function names and performing unnecessary database calls.
###### Why this matters
This approach adds overhead by making a database round-trip for every
function name lookup, even though most standard functions are already known.
The database query could be expensive when performed frequently, especially in
autocomplete scenarios.
###### Suggested change ∙ *Feature Preview*
Cache the database-specific functions on first retrieval and merge with the
standard functions. Implement as:
```python
@classmethod
def get_function_names(cls, database: Database) -> list[str]:
if not hasattr(cls, '_cached_db_functions'):
cls._cached_db_functions = {}
db_id = database.id
if db_id not in cls._cached_db_functions:
# Fetch from DB only once and cache
if (database_name := cls.get_default_schema(database, None)) is not
None:
try:
df = database.get_df(f"SHOW FUNCTIONS IN
`{database_name.replace('`', '``')}`")
function_name_col = f"Functions_in_{database_name}"
if function_name_col in df:
cls._cached_db_functions[db_id] =
df[function_name_col].tolist()
else:
cls._cached_db_functions[db_id] = []
except Exception:
cls._cached_db_functions[db_id] = []
logger.error("Error fetching database functions",
exc_info=True)
return cls.base_functions + cls._cached_db_functions.get(db_id, [])
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:95ebca98-a837-4861-9c4e-df23bb8db69c -->
[](95ebca98-a837-4861-9c4e-df23bb8db69c)
##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping,
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+ engine_name = "SingleStore"
+
+ engine = "singlestoredb"
+ drivers = {"singlestoredb": "SingleStore Python client"}
+ default_driver = "singlestoredb"
+
+ limit_method = LimitMethod.FORCE_LIMIT
+ allows_joins = True
+ allows_subqueries = True
+ allows_alias_in_select = True
+ allows_alias_in_orderby = True
+ time_groupby_inline = False
+ allows_alias_to_source_column = True
+ allows_hidden_orderby_agg = True
+ allows_hidden_cc_in_orderby = False
+ allows_cte_in_subquery = True
+ allow_limit_clause = True
+ max_column_name_length = 256
+ allows_sql_comments = True
+ allows_escaped_colons = True
+ supports_file_upload = True
+ supports_dynamic_schema = True
+ disable_ssh_tunneling = False
+
+ sqlalchemy_uri_placeholder = (
+ "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+ )
+
+ _time_grain_expressions = {
+ None: "{col}",
+ TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+ TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+ TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+ TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+ TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+ TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+ TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+ TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+ }
+
+ column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+ (
+ re.compile(r"^tinyint", re.IGNORECASE),
+ types.SmallInteger(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^mediumint", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^year", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^bit", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(var)?binary", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^json", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^bson", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geographypoint", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geography", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^vector", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^enum", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^set", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ )
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ """
+ Get a list of function names that are able to be called on the
database.
+ Used for SQL Lab autocomplete.
+
+ :param database: The database to get functions for
+ :return: A list of function names usable in the database
+ """
+
+ functions = [
+ "ABS",
+ "ACOS",
+ "ADDTIME",
+ "AES_DECRYPT",
+ "AES_ENCRYPT",
+ "AGGREGATOR_ID",
+ "ANY_VALUE",
+ "APPROX_COUNT_DISTINCT",
+ "APPROX_COUNT_DISTINCT_ACCUMULATE",
+ "APPROX_COUNT_DISTINCT_COMBINE",
+ "APPROX_COUNT_DISTINCT_ESTIMATE",
+ "APPROX_GEOGRAPHY_INTERSECTS",
+ "APPROX_PERCENTILE",
+ "ASCII",
+ "ASIN",
+ "ATAN",
+ "ATAN2",
+ "ATAN",
+ "AVG",
+ "BETWEEN",
+ "NOT",
+ "BIN",
+ "BIN_TO_UUID",
+ "BINARY",
+ "BIT_AND",
+ "BIT_COUNT",
+ "BIT_OR",
+ "BIT_XOR",
+ "BM25",
+ "BSON_ARRAY_CONTAINS_BSON",
+ "BSON_ARRAY_PUSH",
+ "BSON_ARRAY_SLICE",
+ "BSON_BUILD_ARRAY",
+ "BSON_BUILD_OBJECT",
+ "BSON_COMPARE",
+ "BSON_EXTRACT_BIGINT",
+ "BSON_EXTRACT_BOOL",
+ "BSON_EXTRACT_BSON",
+ "BSON_EXTRACT_DATETIME",
+ "BSON_EXTRACT_DOUBLE",
+ "BSON_EXTRACT_STRING",
+ "BSON_GET_TYPE",
+ "BSON_INCLUDE_MASK",
+ "BSON_EXCLUDE_MASK",
+ "BSON_LENGTH",
+ "BSON_MATCH_ANY",
+ "BSON_MATCH_ANY_EXISTS",
+ "BSON_MERGE",
+ "BSON_NORMALIZE_",
+ *"BSON_SET_BSON",
+ "BSON_UNWIND",
+ "CASE",
+ "CEIL",
+ "CHAR",
+ "CHARACTER_LENGTH",
+ "CHARSET",
+ "COALESCE",
+ "CONCAT",
+ "CONCAT_WS",
+ "CONNECTION_ID",
+ "CONV",
+ "CAST",
+ "CONVERT",
+ "CONVERT_TZ",
+ "COS",
+ "COT",
+ "COUNT",
+ "CRC32",
+ "CURRENT_DATE",
+ "CURDATE",
+ "CURRENT_TIME",
+ "CURTIME",
+ "CURRENT_TIMESTAMP",
+ "DATABASE",
+ "DATE",
+ "DATE_ADD",
+ "DATE_FORMAT",
+ "DATE_SUB",
+ "DATE_TRUNC",
+ "DATEDIFF",
+ "DAY",
+ "DAYNAME",
+ "DAYOFWEEK",
+ "DAYOFYEAR",
+ "DECODE",
+ "DEGREES",
+ "DENSE_RANK",
+ "DOT_PRODUCT",
+ "ELT",
+ "ESTIMATED_QUERY_LEAF_MEMORY",
+ "ESTIMATED_QUERY_RUNTIME",
+ "EUCLIDEAN_DISTANCE",
+ "EXP",
+ "EXTRACT",
+ "FIELD",
+ "FIRST",
+ "FIRST_VALUE",
+ "FLOOR",
+ "FORMAT",
+ "FOUND_ROWS",
+ "FROM_BASE64",
+ "FROM_DAYS",
+ "FROM_UNIXTIME",
+ "GEOGRAPHY_AREA",
+ "GEOGRAPHY_CONTAINS",
+ "GEOGRAPHY_DISTANCE",
+ "GEOGRAPHY_INTERSECTS",
+ "GEOGRAPHY_LATITUDE",
+ "GEOGRAPHY_LENGTH",
+ "GEOGRAPHY_LONGITUDE",
+ "GEOGRAPHY_POINT",
+ "GEOGRAPHY_WITHIN_DISTANCE",
+ "GET_FORMAT",
+ "GREATEST",
+ "GROUP_CONCAT",
+ "HEX",
+ "HIGHLIGHT",
+ "HOUR",
+ "IF",
+ "IN",
+ "INET_ATON",
+ "INET_NTOA",
+ "INET6_ATON",
+ "INET6_NTOA",
+ "INITCAP",
+ "INSTR",
+ "IS_BSON_NULL",
+ "IS_UUID",
+ "ISNULL",
+ "ISNUMERIC",
+ "JSON_AGG",
+ "JSON_ARRAY_CONTAINS_DOUBLE",
+ "JSON_ARRAY_CONTAINS_STRING",
+ "JSON_ARRAY_CONTAINS_JSON",
+ "JSON_ARRAY_PACK",
+ "JSON_ARRAY_PUSH_DOUBLE",
+ "JSON_ARRAY_PUSH_STRING",
+ "JSON_ARRAY_PUSH_JSON",
+ "JSON_ARRAY_UNPACK",
+ "JSON_BUILD_ARRAY",
+ "JSON_BUILD_OBJECT",
+ "JSON_DELETE_KEY",
+ "JSON_EXTRACT_DOUBLE",
+ "JSON_EXTRACT_STRING",
+ "JSON_EXTRACT_JSON",
+ "JSON_EXTRACT_BIGINT",
+ "JSON_GET_TYPE",
+ "JSON_KEYS",
+ "JSON_LENGTH",
+ "JSON_MATCH_ANY",
+ "JSON_MERGE_PATCH",
+ "JSON_PRETTY",
+ "JSON_SET_DOUBLE",
+ "JSON_SET_STRING",
+ "JSON_SET_JSON",
+ "JSON_SPLICE_DOUBLE",
+ "JSON_SPLICE_STRING",
+ "JSON_SPLICE_JSON",
+ "JSON_TO_ARRAY",
+ "LAG",
+ "LAST",
+ "LAST_DAY",
+ "LAST_INSERT_ID",
+ "LAST_VALUE",
+ "LCASE",
+ "LEAD",
+ "LEAST",
+ "LEFT",
+ "LENGTH",
+ "LIKE",
+ "LN",
+ "LOCALTIMESTAMP",
+ "LOCATE",
+ "LOG",
+ "LOG10",
+ "LOG2",
+ "LPAD",
+ "LTRIM",
+ "MATCH",
+ "MAX",
+ "MD5",
+ "MEDIAN",
+ "MICROSECOND",
+ "MIN",
+ "MINUTE",
+ "MOD",
+ "MONTH",
+ "MONTHNAME",
+ "MONTHS_BETWEEN",
+ "NOPARAM",
+ "NOW",
+ "NTH_VALUE",
+ "NTILE",
+ "NULLIF",
+ "NVL",
+ "IFNULL",
+ "PERCENT_RANK",
+ "PERCENTILE_CONT",
+ "MEDIAN",
+ "PERCENTILE_DISC",
+ "PI",
+ "POW",
+ "QUARTER",
+ "QUOTE",
+ "RADIANS",
+ "RAND",
+ "RANK",
+ "REDUCE",
+ "REGEXP_INSTR",
+ "REGEXP_MATCH",
+ "REGEXP_REPLACE",
+ "REGEXP_SUBSTR",
+ "REPLACE",
+ "REVERSE",
+ "RIGHT",
+ "RLIKE",
+ "REGEXP",
+ "ROUND",
+ "ROW_COUNT",
+ "ROW_NUMBER",
+ "RPAD",
+ "RTRIM",
+ "SCALAR_VECTOR_MUL",
+ "SEC_TO_TIME",
+ "SECOND",
+ "SECRET",
+ "SET",
+ "SHA1",
+ "SHA2",
+ "SIGMOID",
+ "SIGN",
+ "SIN",
+ "SLEEP",
+ "SPLIT",
+ "SQRT",
+ "STD",
+ "STDDEV",
+ "STDDEV_POP",
+ "STDDEV_SAMP",
+ "STR_TO_DATE",
+ "strcmp",
+ "STRING_BYTES",
+ "SUBSTRING",
+ "SUBSTRING_INDEX",
+ "SUM",
+ "SYS_GUID",
+ "UUID",
+ "TAN",
+ "TIME",
+ "TIME_BUCKET",
+ "TIME_FORMAT",
+ "TIME_TO_SEC",
+ "TIMEDIFF",
+ "TIMESTAMP",
+ "TIMESTAMPADD",
+ "TIMESTAMPDIFF",
+ "TO_BASE64",
+ "TO_CHAR",
+ "TO_DATE",
+ "TO_DAYS",
+ "TO_JSON",
+ "TO_NUMBER",
+ "TO_SECONDS",
+ "TO_TIMESTAMP",
+ "TRIM",
+ "TRUNC",
+ "TRUNCATE",
+ "UCASE",
+ "UNHEX",
+ "UNIX_TIMESTAMP",
+ "USER",
+ "UTC_DATE",
+ "UTC_TIME",
+ "UTC_TIMESTAMP",
+ "UUID_TO_BIN",
+ "VARIANCE",
+ "VAR_SAMP",
+ "VECTOR_ADD",
+ "VECTOR_ELEMENTS_SUM",
+ "VECTOR_KTH_ELEMENT",
+ "VECTOR_MUL",
+ "VECTOR_NUM_ELEMENTS",
+ "VECTOR_SORT",
+ "VECTOR_SUB",
+ "VECTOR_SUBVECTOR",
+ "VECTOR_SUM",
+ "WEEK",
+ "WEEKDAY",
+ "YEAR",
+ ]
+
+ if (database_name := cls.get_default_schema(database, None)) is not
None:
+ df = database.get_df(
+ f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+ )
+
+ function_name_col = f"Functions_in_{database_name}"
+ if function_name_col in df:
+ functions += df[function_name_col].tolist()
+ else:
+ columns = df.columns.values.tolist()
+ logger.error(
+ "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+ "Expected column `%s`, found: %s.",
+ function_name_col,
+ ", ".join(columns),
+ exc_info=True,
+ )
+
+ return functions
+
+ @classmethod
+ def epoch_to_dttm(cls) -> str:
+ return "from_unixtime({col})"
+
+ @classmethod
+ def convert_dttm(
+ cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str,
Any]] = None
+ ) -> Optional[str]:
+ sqla_type = cls.get_sqla_column_type(target_type)
+
+ if isinstance(sqla_type, types.Date):
+ return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+ if isinstance(sqla_type, types.TIMESTAMP):
+ return f"""('{dttm.isoformat(sep=" ", timespec="microseconds")}'
:> TIMESTAMP(6))""" # noqa: E501
+ if isinstance(sqla_type, types.DateTime):
+ return f"""CAST('{dttm.isoformat(sep=" ",
timespec="microseconds")}' AS DATETIME(6))""" # noqa: E501
+ if isinstance(sqla_type, types.Time):
+ return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME(6))"""
+
+ return None
+
+ @classmethod
+ def adjust_engine_params(
+ cls,
+ uri: URL,
+ connect_args: dict[str, Any],
+ catalog: Optional[str] = None,
+ schema: Optional[str] = None,
+ ) -> tuple[URL, dict[str, Any]]:
+ uri, new_connect_args = super().adjust_engine_params(
+ uri,
+ connect_args,
+ catalog,
+ schema,
+ )
+
+ if schema:
+ uri = uri.set(database=parse.quote(schema, safe=""))
+
+ return uri, new_connect_args
+
+ @classmethod
+ def get_schema_from_engine_params(
+ cls,
+ sqlalchemy_uri: URL,
+ connect_args: dict[str, Any],
+ ) -> Optional[str]:
+ """
+ Return the configured schema.
+
+ A MySQL database is a SQLAlchemy schema.
+ """
+ return parse.unquote(sqlalchemy_uri.database)
Review Comment:
### Missing None Check in Schema Extraction <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
The get_schema_from_engine_params method doesn't handle the case where
sqlalchemy_uri.database is None.
###### Why this matters
This could lead to a TypeError when trying to unquote None, causing
unexpected crashes in the application.
###### Suggested change ∙ *Feature Preview*
Add None check before unquoting:
```python
return parse.unquote(sqlalchemy_uri.database) if sqlalchemy_uri.database
else None
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:72f11f02-2613-4fc5-81f4-42ba0cd60d22 -->
[](72f11f02-2613-4fc5-81f4-42ba0cd60d22)
##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping,
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+ engine_name = "SingleStore"
+
+ engine = "singlestoredb"
+ drivers = {"singlestoredb": "SingleStore Python client"}
+ default_driver = "singlestoredb"
+
+ limit_method = LimitMethod.FORCE_LIMIT
+ allows_joins = True
+ allows_subqueries = True
+ allows_alias_in_select = True
+ allows_alias_in_orderby = True
+ time_groupby_inline = False
+ allows_alias_to_source_column = True
+ allows_hidden_orderby_agg = True
+ allows_hidden_cc_in_orderby = False
+ allows_cte_in_subquery = True
+ allow_limit_clause = True
+ max_column_name_length = 256
+ allows_sql_comments = True
+ allows_escaped_colons = True
+ supports_file_upload = True
+ supports_dynamic_schema = True
+ disable_ssh_tunneling = False
+
+ sqlalchemy_uri_placeholder = (
+ "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+ )
+
+ _time_grain_expressions = {
+ None: "{col}",
+ TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+ TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+ TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+ TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+ TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+ TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+ TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+ TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+ }
+
+ column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+ (
+ re.compile(r"^tinyint", re.IGNORECASE),
+ types.SmallInteger(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^mediumint", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^year", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^bit", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(var)?binary", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^json", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^bson", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geographypoint", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geography", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^vector", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^enum", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^set", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ )
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ """
+ Get a list of function names that are able to be called on the
database.
+ Used for SQL Lab autocomplete.
+
+ :param database: The database to get functions for
+ :return: A list of function names usable in the database
+ """
+
+ functions = [
+ "ABS",
+ "ACOS",
+ "ADDTIME",
+ "AES_DECRYPT",
+ "AES_ENCRYPT",
+ "AGGREGATOR_ID",
+ "ANY_VALUE",
+ "APPROX_COUNT_DISTINCT",
+ "APPROX_COUNT_DISTINCT_ACCUMULATE",
+ "APPROX_COUNT_DISTINCT_COMBINE",
+ "APPROX_COUNT_DISTINCT_ESTIMATE",
+ "APPROX_GEOGRAPHY_INTERSECTS",
+ "APPROX_PERCENTILE",
+ "ASCII",
+ "ASIN",
+ "ATAN",
+ "ATAN2",
+ "ATAN",
+ "AVG",
+ "BETWEEN",
+ "NOT",
+ "BIN",
+ "BIN_TO_UUID",
+ "BINARY",
+ "BIT_AND",
+ "BIT_COUNT",
+ "BIT_OR",
+ "BIT_XOR",
+ "BM25",
+ "BSON_ARRAY_CONTAINS_BSON",
+ "BSON_ARRAY_PUSH",
+ "BSON_ARRAY_SLICE",
+ "BSON_BUILD_ARRAY",
+ "BSON_BUILD_OBJECT",
+ "BSON_COMPARE",
+ "BSON_EXTRACT_BIGINT",
+ "BSON_EXTRACT_BOOL",
+ "BSON_EXTRACT_BSON",
+ "BSON_EXTRACT_DATETIME",
+ "BSON_EXTRACT_DOUBLE",
+ "BSON_EXTRACT_STRING",
+ "BSON_GET_TYPE",
+ "BSON_INCLUDE_MASK",
+ "BSON_EXCLUDE_MASK",
+ "BSON_LENGTH",
+ "BSON_MATCH_ANY",
+ "BSON_MATCH_ANY_EXISTS",
+ "BSON_MERGE",
+ "BSON_NORMALIZE_",
+ *"BSON_SET_BSON",
+ "BSON_UNWIND",
+ "CASE",
+ "CEIL",
+ "CHAR",
+ "CHARACTER_LENGTH",
+ "CHARSET",
+ "COALESCE",
+ "CONCAT",
+ "CONCAT_WS",
+ "CONNECTION_ID",
+ "CONV",
+ "CAST",
+ "CONVERT",
+ "CONVERT_TZ",
+ "COS",
+ "COT",
+ "COUNT",
+ "CRC32",
+ "CURRENT_DATE",
+ "CURDATE",
+ "CURRENT_TIME",
+ "CURTIME",
+ "CURRENT_TIMESTAMP",
+ "DATABASE",
+ "DATE",
+ "DATE_ADD",
+ "DATE_FORMAT",
+ "DATE_SUB",
+ "DATE_TRUNC",
+ "DATEDIFF",
+ "DAY",
+ "DAYNAME",
+ "DAYOFWEEK",
+ "DAYOFYEAR",
+ "DECODE",
+ "DEGREES",
+ "DENSE_RANK",
+ "DOT_PRODUCT",
+ "ELT",
+ "ESTIMATED_QUERY_LEAF_MEMORY",
+ "ESTIMATED_QUERY_RUNTIME",
+ "EUCLIDEAN_DISTANCE",
+ "EXP",
+ "EXTRACT",
+ "FIELD",
+ "FIRST",
+ "FIRST_VALUE",
+ "FLOOR",
+ "FORMAT",
+ "FOUND_ROWS",
+ "FROM_BASE64",
+ "FROM_DAYS",
+ "FROM_UNIXTIME",
+ "GEOGRAPHY_AREA",
+ "GEOGRAPHY_CONTAINS",
+ "GEOGRAPHY_DISTANCE",
+ "GEOGRAPHY_INTERSECTS",
+ "GEOGRAPHY_LATITUDE",
+ "GEOGRAPHY_LENGTH",
+ "GEOGRAPHY_LONGITUDE",
+ "GEOGRAPHY_POINT",
+ "GEOGRAPHY_WITHIN_DISTANCE",
+ "GET_FORMAT",
+ "GREATEST",
+ "GROUP_CONCAT",
+ "HEX",
+ "HIGHLIGHT",
+ "HOUR",
+ "IF",
+ "IN",
+ "INET_ATON",
+ "INET_NTOA",
+ "INET6_ATON",
+ "INET6_NTOA",
+ "INITCAP",
+ "INSTR",
+ "IS_BSON_NULL",
+ "IS_UUID",
+ "ISNULL",
+ "ISNUMERIC",
+ "JSON_AGG",
+ "JSON_ARRAY_CONTAINS_DOUBLE",
+ "JSON_ARRAY_CONTAINS_STRING",
+ "JSON_ARRAY_CONTAINS_JSON",
+ "JSON_ARRAY_PACK",
+ "JSON_ARRAY_PUSH_DOUBLE",
+ "JSON_ARRAY_PUSH_STRING",
+ "JSON_ARRAY_PUSH_JSON",
+ "JSON_ARRAY_UNPACK",
+ "JSON_BUILD_ARRAY",
+ "JSON_BUILD_OBJECT",
+ "JSON_DELETE_KEY",
+ "JSON_EXTRACT_DOUBLE",
+ "JSON_EXTRACT_STRING",
+ "JSON_EXTRACT_JSON",
+ "JSON_EXTRACT_BIGINT",
+ "JSON_GET_TYPE",
+ "JSON_KEYS",
+ "JSON_LENGTH",
+ "JSON_MATCH_ANY",
+ "JSON_MERGE_PATCH",
+ "JSON_PRETTY",
+ "JSON_SET_DOUBLE",
+ "JSON_SET_STRING",
+ "JSON_SET_JSON",
+ "JSON_SPLICE_DOUBLE",
+ "JSON_SPLICE_STRING",
+ "JSON_SPLICE_JSON",
+ "JSON_TO_ARRAY",
+ "LAG",
+ "LAST",
+ "LAST_DAY",
+ "LAST_INSERT_ID",
+ "LAST_VALUE",
+ "LCASE",
+ "LEAD",
+ "LEAST",
+ "LEFT",
+ "LENGTH",
+ "LIKE",
+ "LN",
+ "LOCALTIMESTAMP",
+ "LOCATE",
+ "LOG",
+ "LOG10",
+ "LOG2",
+ "LPAD",
+ "LTRIM",
+ "MATCH",
+ "MAX",
+ "MD5",
+ "MEDIAN",
+ "MICROSECOND",
+ "MIN",
+ "MINUTE",
+ "MOD",
+ "MONTH",
+ "MONTHNAME",
+ "MONTHS_BETWEEN",
+ "NOPARAM",
+ "NOW",
+ "NTH_VALUE",
+ "NTILE",
+ "NULLIF",
+ "NVL",
+ "IFNULL",
+ "PERCENT_RANK",
+ "PERCENTILE_CONT",
+ "MEDIAN",
+ "PERCENTILE_DISC",
+ "PI",
+ "POW",
+ "QUARTER",
+ "QUOTE",
+ "RADIANS",
+ "RAND",
+ "RANK",
+ "REDUCE",
+ "REGEXP_INSTR",
+ "REGEXP_MATCH",
+ "REGEXP_REPLACE",
+ "REGEXP_SUBSTR",
+ "REPLACE",
+ "REVERSE",
+ "RIGHT",
+ "RLIKE",
+ "REGEXP",
+ "ROUND",
+ "ROW_COUNT",
+ "ROW_NUMBER",
+ "RPAD",
+ "RTRIM",
+ "SCALAR_VECTOR_MUL",
+ "SEC_TO_TIME",
+ "SECOND",
+ "SECRET",
+ "SET",
+ "SHA1",
+ "SHA2",
+ "SIGMOID",
+ "SIGN",
+ "SIN",
+ "SLEEP",
+ "SPLIT",
+ "SQRT",
+ "STD",
+ "STDDEV",
+ "STDDEV_POP",
+ "STDDEV_SAMP",
+ "STR_TO_DATE",
+ "strcmp",
+ "STRING_BYTES",
+ "SUBSTRING",
+ "SUBSTRING_INDEX",
+ "SUM",
+ "SYS_GUID",
+ "UUID",
+ "TAN",
+ "TIME",
+ "TIME_BUCKET",
+ "TIME_FORMAT",
+ "TIME_TO_SEC",
+ "TIMEDIFF",
+ "TIMESTAMP",
+ "TIMESTAMPADD",
+ "TIMESTAMPDIFF",
+ "TO_BASE64",
+ "TO_CHAR",
+ "TO_DATE",
+ "TO_DAYS",
+ "TO_JSON",
+ "TO_NUMBER",
+ "TO_SECONDS",
+ "TO_TIMESTAMP",
+ "TRIM",
+ "TRUNC",
+ "TRUNCATE",
+ "UCASE",
+ "UNHEX",
+ "UNIX_TIMESTAMP",
+ "USER",
+ "UTC_DATE",
+ "UTC_TIME",
+ "UTC_TIMESTAMP",
+ "UUID_TO_BIN",
+ "VARIANCE",
+ "VAR_SAMP",
+ "VECTOR_ADD",
+ "VECTOR_ELEMENTS_SUM",
+ "VECTOR_KTH_ELEMENT",
+ "VECTOR_MUL",
+ "VECTOR_NUM_ELEMENTS",
+ "VECTOR_SORT",
+ "VECTOR_SUB",
+ "VECTOR_SUBVECTOR",
+ "VECTOR_SUM",
+ "WEEK",
+ "WEEKDAY",
+ "YEAR",
+ ]
+
+ if (database_name := cls.get_default_schema(database, None)) is not
None:
+ df = database.get_df(
+ f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+ )
+
+ function_name_col = f"Functions_in_{database_name}"
+ if function_name_col in df:
+ functions += df[function_name_col].tolist()
+ else:
+ columns = df.columns.values.tolist()
+ logger.error(
+ "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+ "Expected column `%s`, found: %s.",
+ function_name_col,
+ ", ".join(columns),
+ exc_info=True,
+ )
+
+ return functions
+
+ @classmethod
+ def epoch_to_dttm(cls) -> str:
+ return "from_unixtime({col})"
+
+ @classmethod
+ def convert_dttm(
+ cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str,
Any]] = None
+ ) -> Optional[str]:
+ sqla_type = cls.get_sqla_column_type(target_type)
+
+ if isinstance(sqla_type, types.Date):
+ return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+ if isinstance(sqla_type, types.TIMESTAMP):
+ return f"""('{dttm.isoformat(sep=" ", timespec="microseconds")}'
:> TIMESTAMP(6))""" # noqa: E501
+ if isinstance(sqla_type, types.DateTime):
+ return f"""CAST('{dttm.isoformat(sep=" ",
timespec="microseconds")}' AS DATETIME(6))""" # noqa: E501
+ if isinstance(sqla_type, types.Time):
+ return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME(6))"""
+
+ return None
+
+ @classmethod
+ def adjust_engine_params(
+ cls,
+ uri: URL,
+ connect_args: dict[str, Any],
+ catalog: Optional[str] = None,
+ schema: Optional[str] = None,
+ ) -> tuple[URL, dict[str, Any]]:
+ uri, new_connect_args = super().adjust_engine_params(
+ uri,
+ connect_args,
+ catalog,
+ schema,
+ )
+
+ if schema:
+ uri = uri.set(database=parse.quote(schema, safe=""))
+
+ return uri, new_connect_args
+
+ @classmethod
+ def get_schema_from_engine_params(
+ cls,
+ sqlalchemy_uri: URL,
+ connect_args: dict[str, Any],
+ ) -> Optional[str]:
+ """
+ Return the configured schema.
+
+ A MySQL database is a SQLAlchemy schema.
+ """
Review Comment:
### Incorrect database reference in docstring <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
The docstring contains incorrect/confusing information by referencing MySQL
when this is a SingleStore spec.
###### Why this matters
Mentioning MySQL in SingleStore-specific code could mislead developers about
the database system they're working with.
###### Suggested change ∙ *Feature Preview*
"""Return the configured schema.
In SingleStore, like MySQL, the database name serves as the schema name in
SQLAlchemy.
"""
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:fe3a50cf-a684-41a1-bb3f-4b10a141783b -->
[](fe3a50cf-a684-41a1-bb3f-4b10a141783b)
##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping,
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+ engine_name = "SingleStore"
+
+ engine = "singlestoredb"
+ drivers = {"singlestoredb": "SingleStore Python client"}
+ default_driver = "singlestoredb"
+
+ limit_method = LimitMethod.FORCE_LIMIT
+ allows_joins = True
+ allows_subqueries = True
+ allows_alias_in_select = True
+ allows_alias_in_orderby = True
+ time_groupby_inline = False
+ allows_alias_to_source_column = True
+ allows_hidden_orderby_agg = True
+ allows_hidden_cc_in_orderby = False
+ allows_cte_in_subquery = True
+ allow_limit_clause = True
+ max_column_name_length = 256
+ allows_sql_comments = True
+ allows_escaped_colons = True
+ supports_file_upload = True
+ supports_dynamic_schema = True
+ disable_ssh_tunneling = False
+
+ sqlalchemy_uri_placeholder = (
+ "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+ )
+
+ _time_grain_expressions = {
+ None: "{col}",
+ TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+ TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+ TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+ TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+ TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+ TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+ TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+ TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+ }
+
+ column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+ (
+ re.compile(r"^tinyint", re.IGNORECASE),
+ types.SmallInteger(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^mediumint", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^year", re.IGNORECASE),
+ types.Integer(),
+ GenericDataType.NUMERIC,
+ ),
+ (
+ re.compile(r"^bit", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(var)?binary", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^json", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^bson", re.IGNORECASE),
+ types.LargeBinary(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geographypoint", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^geography", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^vector", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^enum", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ (
+ re.compile(r"^set", re.IGNORECASE),
+ types.String(),
+ GenericDataType.STRING,
+ ),
+ )
+
+ @classmethod
+ def get_function_names(
+ cls,
+ database: Database,
+ ) -> list[str]:
+ """
+ Get a list of function names that are able to be called on the
database.
+ Used for SQL Lab autocomplete.
+
+ :param database: The database to get functions for
+ :return: A list of function names usable in the database
+ """
+
+ functions = [
+ "ABS",
+ "ACOS",
+ "ADDTIME",
+ "AES_DECRYPT",
+ "AES_ENCRYPT",
+ "AGGREGATOR_ID",
+ "ANY_VALUE",
+ "APPROX_COUNT_DISTINCT",
+ "APPROX_COUNT_DISTINCT_ACCUMULATE",
+ "APPROX_COUNT_DISTINCT_COMBINE",
+ "APPROX_COUNT_DISTINCT_ESTIMATE",
+ "APPROX_GEOGRAPHY_INTERSECTS",
+ "APPROX_PERCENTILE",
+ "ASCII",
+ "ASIN",
+ "ATAN",
+ "ATAN2",
+ "ATAN",
+ "AVG",
+ "BETWEEN",
+ "NOT",
+ "BIN",
+ "BIN_TO_UUID",
+ "BINARY",
+ "BIT_AND",
+ "BIT_COUNT",
+ "BIT_OR",
+ "BIT_XOR",
+ "BM25",
+ "BSON_ARRAY_CONTAINS_BSON",
+ "BSON_ARRAY_PUSH",
+ "BSON_ARRAY_SLICE",
+ "BSON_BUILD_ARRAY",
+ "BSON_BUILD_OBJECT",
+ "BSON_COMPARE",
+ "BSON_EXTRACT_BIGINT",
+ "BSON_EXTRACT_BOOL",
+ "BSON_EXTRACT_BSON",
+ "BSON_EXTRACT_DATETIME",
+ "BSON_EXTRACT_DOUBLE",
+ "BSON_EXTRACT_STRING",
+ "BSON_GET_TYPE",
+ "BSON_INCLUDE_MASK",
+ "BSON_EXCLUDE_MASK",
+ "BSON_LENGTH",
+ "BSON_MATCH_ANY",
+ "BSON_MATCH_ANY_EXISTS",
+ "BSON_MERGE",
+ "BSON_NORMALIZE_",
+ *"BSON_SET_BSON",
+ "BSON_UNWIND",
+ "CASE",
+ "CEIL",
+ "CHAR",
+ "CHARACTER_LENGTH",
+ "CHARSET",
+ "COALESCE",
+ "CONCAT",
+ "CONCAT_WS",
+ "CONNECTION_ID",
+ "CONV",
+ "CAST",
+ "CONVERT",
+ "CONVERT_TZ",
+ "COS",
+ "COT",
+ "COUNT",
+ "CRC32",
+ "CURRENT_DATE",
+ "CURDATE",
+ "CURRENT_TIME",
+ "CURTIME",
+ "CURRENT_TIMESTAMP",
+ "DATABASE",
+ "DATE",
+ "DATE_ADD",
+ "DATE_FORMAT",
+ "DATE_SUB",
+ "DATE_TRUNC",
+ "DATEDIFF",
+ "DAY",
+ "DAYNAME",
+ "DAYOFWEEK",
+ "DAYOFYEAR",
+ "DECODE",
+ "DEGREES",
+ "DENSE_RANK",
+ "DOT_PRODUCT",
+ "ELT",
+ "ESTIMATED_QUERY_LEAF_MEMORY",
+ "ESTIMATED_QUERY_RUNTIME",
+ "EUCLIDEAN_DISTANCE",
+ "EXP",
+ "EXTRACT",
+ "FIELD",
+ "FIRST",
+ "FIRST_VALUE",
+ "FLOOR",
+ "FORMAT",
+ "FOUND_ROWS",
+ "FROM_BASE64",
+ "FROM_DAYS",
+ "FROM_UNIXTIME",
+ "GEOGRAPHY_AREA",
+ "GEOGRAPHY_CONTAINS",
+ "GEOGRAPHY_DISTANCE",
+ "GEOGRAPHY_INTERSECTS",
+ "GEOGRAPHY_LATITUDE",
+ "GEOGRAPHY_LENGTH",
+ "GEOGRAPHY_LONGITUDE",
+ "GEOGRAPHY_POINT",
+ "GEOGRAPHY_WITHIN_DISTANCE",
+ "GET_FORMAT",
+ "GREATEST",
+ "GROUP_CONCAT",
+ "HEX",
+ "HIGHLIGHT",
+ "HOUR",
+ "IF",
+ "IN",
+ "INET_ATON",
+ "INET_NTOA",
+ "INET6_ATON",
+ "INET6_NTOA",
+ "INITCAP",
+ "INSTR",
+ "IS_BSON_NULL",
+ "IS_UUID",
+ "ISNULL",
+ "ISNUMERIC",
+ "JSON_AGG",
+ "JSON_ARRAY_CONTAINS_DOUBLE",
+ "JSON_ARRAY_CONTAINS_STRING",
+ "JSON_ARRAY_CONTAINS_JSON",
+ "JSON_ARRAY_PACK",
+ "JSON_ARRAY_PUSH_DOUBLE",
+ "JSON_ARRAY_PUSH_STRING",
+ "JSON_ARRAY_PUSH_JSON",
+ "JSON_ARRAY_UNPACK",
+ "JSON_BUILD_ARRAY",
+ "JSON_BUILD_OBJECT",
+ "JSON_DELETE_KEY",
+ "JSON_EXTRACT_DOUBLE",
+ "JSON_EXTRACT_STRING",
+ "JSON_EXTRACT_JSON",
+ "JSON_EXTRACT_BIGINT",
+ "JSON_GET_TYPE",
+ "JSON_KEYS",
+ "JSON_LENGTH",
+ "JSON_MATCH_ANY",
+ "JSON_MERGE_PATCH",
+ "JSON_PRETTY",
+ "JSON_SET_DOUBLE",
+ "JSON_SET_STRING",
+ "JSON_SET_JSON",
+ "JSON_SPLICE_DOUBLE",
+ "JSON_SPLICE_STRING",
+ "JSON_SPLICE_JSON",
+ "JSON_TO_ARRAY",
+ "LAG",
+ "LAST",
+ "LAST_DAY",
+ "LAST_INSERT_ID",
+ "LAST_VALUE",
+ "LCASE",
+ "LEAD",
+ "LEAST",
+ "LEFT",
+ "LENGTH",
+ "LIKE",
+ "LN",
+ "LOCALTIMESTAMP",
+ "LOCATE",
+ "LOG",
+ "LOG10",
+ "LOG2",
+ "LPAD",
+ "LTRIM",
+ "MATCH",
+ "MAX",
+ "MD5",
+ "MEDIAN",
+ "MICROSECOND",
+ "MIN",
+ "MINUTE",
+ "MOD",
+ "MONTH",
+ "MONTHNAME",
+ "MONTHS_BETWEEN",
+ "NOPARAM",
+ "NOW",
+ "NTH_VALUE",
+ "NTILE",
+ "NULLIF",
+ "NVL",
+ "IFNULL",
+ "PERCENT_RANK",
+ "PERCENTILE_CONT",
+ "MEDIAN",
+ "PERCENTILE_DISC",
+ "PI",
+ "POW",
+ "QUARTER",
+ "QUOTE",
+ "RADIANS",
+ "RAND",
+ "RANK",
+ "REDUCE",
+ "REGEXP_INSTR",
+ "REGEXP_MATCH",
+ "REGEXP_REPLACE",
+ "REGEXP_SUBSTR",
+ "REPLACE",
+ "REVERSE",
+ "RIGHT",
+ "RLIKE",
+ "REGEXP",
+ "ROUND",
+ "ROW_COUNT",
+ "ROW_NUMBER",
+ "RPAD",
+ "RTRIM",
+ "SCALAR_VECTOR_MUL",
+ "SEC_TO_TIME",
+ "SECOND",
+ "SECRET",
+ "SET",
+ "SHA1",
+ "SHA2",
+ "SIGMOID",
+ "SIGN",
+ "SIN",
+ "SLEEP",
+ "SPLIT",
+ "SQRT",
+ "STD",
+ "STDDEV",
+ "STDDEV_POP",
+ "STDDEV_SAMP",
+ "STR_TO_DATE",
+ "strcmp",
+ "STRING_BYTES",
+ "SUBSTRING",
+ "SUBSTRING_INDEX",
+ "SUM",
+ "SYS_GUID",
+ "UUID",
+ "TAN",
+ "TIME",
+ "TIME_BUCKET",
+ "TIME_FORMAT",
+ "TIME_TO_SEC",
+ "TIMEDIFF",
+ "TIMESTAMP",
+ "TIMESTAMPADD",
+ "TIMESTAMPDIFF",
+ "TO_BASE64",
+ "TO_CHAR",
+ "TO_DATE",
+ "TO_DAYS",
+ "TO_JSON",
+ "TO_NUMBER",
+ "TO_SECONDS",
+ "TO_TIMESTAMP",
+ "TRIM",
+ "TRUNC",
+ "TRUNCATE",
+ "UCASE",
+ "UNHEX",
+ "UNIX_TIMESTAMP",
+ "USER",
+ "UTC_DATE",
+ "UTC_TIME",
+ "UTC_TIMESTAMP",
+ "UUID_TO_BIN",
+ "VARIANCE",
+ "VAR_SAMP",
+ "VECTOR_ADD",
+ "VECTOR_ELEMENTS_SUM",
+ "VECTOR_KTH_ELEMENT",
+ "VECTOR_MUL",
+ "VECTOR_NUM_ELEMENTS",
+ "VECTOR_SORT",
+ "VECTOR_SUB",
+ "VECTOR_SUBVECTOR",
+ "VECTOR_SUM",
+ "WEEK",
+ "WEEKDAY",
+ "YEAR",
+ ]
+
+ if (database_name := cls.get_default_schema(database, None)) is not
None:
+ df = database.get_df(
+ f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+ )
+
+ function_name_col = f"Functions_in_{database_name}"
+ if function_name_col in df:
+ functions += df[function_name_col].tolist()
+ else:
+ columns = df.columns.values.tolist()
+ logger.error(
+ "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+ "Expected column `%s`, found: %s.",
+ function_name_col,
+ ", ".join(columns),
+ exc_info=True,
+ )
Review Comment:
### Unnecessary traceback logging <sub></sub>
<details>
<summary>Tell me more</summary>
###### What is the issue?
The error logging includes exc_info=True but there is no active exception
being handled, making the traceback information empty and unhelpful.
###### Why this matters
Including exc_info=True without an active exception context adds unnecessary
overhead and provides no additional debugging value.
###### Suggested change ∙ *Feature Preview*
```python
columns = df.columns.values.tolist()
logger.error(
"Payload from `SHOW FUNCTIONS` has the incorrect format. "
"Expected column `%s`, found: %s.",
function_name_col,
", ".join(columns)
)
```
###### Provide feedback to improve future suggestions
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146/upvote)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_not_true=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_out_of_scope=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_not_in_standard=true)
[](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146)
</details>
<sub>
💬 Looking for more details? Reply to this comment to chat with Korbit.
</sub>
<!--- korbi internal id:67c0e461-d3a1-4948-8bb2-325b47d65b38 -->
[](67c0e461-d3a1-4948-8bb2-325b47d65b38)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]