Re: [PR] feat(database): add SingleStore engine specification [superset]

via GitHub Thu, 27 Mar 2025 08:09:11 -0700


korbit-ai[bot] commented on code in PR #32887:
URL: https://github.com/apache/superset/pull/32887#discussion_r2016896536



##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping, 
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+    engine_name = "SingleStore"
+
+    engine = "singlestoredb"
+    drivers = {"singlestoredb": "SingleStore Python client"}
+    default_driver = "singlestoredb"
+
+    limit_method = LimitMethod.FORCE_LIMIT
+    allows_joins = True
+    allows_subqueries = True
+    allows_alias_in_select = True
+    allows_alias_in_orderby = True
+    time_groupby_inline = False
+    allows_alias_to_source_column = True
+    allows_hidden_orderby_agg = True
+    allows_hidden_cc_in_orderby = False
+    allows_cte_in_subquery = True
+    allow_limit_clause = True
+    max_column_name_length = 256
+    allows_sql_comments = True
+    allows_escaped_colons = True
+    supports_file_upload = True
+    supports_dynamic_schema = True
+    disable_ssh_tunneling = False
+
+    sqlalchemy_uri_placeholder = (
+        "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+    )
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+        TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+        TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+        TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+        TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+        TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+        TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+        TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+    }
+
+    column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+        (
+            re.compile(r"^tinyint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^mediumint", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^year", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bit", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(var)?binary", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^bson", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geographypoint", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geography", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^vector", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^enum", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^set", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+    )
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        """
+        Get a list of function names that are able to be called on the 
database.
+        Used for SQL Lab autocomplete.
+
+        :param database: The database to get functions for
+        :return: A list of function names usable in the database
+        """
+
+        functions = [
+            "ABS",
+            "ACOS",
+            "ADDTIME",
+            "AES_DECRYPT",
+            "AES_ENCRYPT",
+            "AGGREGATOR_ID",
+            "ANY_VALUE",
+            "APPROX_COUNT_DISTINCT",
+            "APPROX_COUNT_DISTINCT_ACCUMULATE",
+            "APPROX_COUNT_DISTINCT_COMBINE",
+            "APPROX_COUNT_DISTINCT_ESTIMATE",
+            "APPROX_GEOGRAPHY_INTERSECTS",
+            "APPROX_PERCENTILE",
+            "ASCII",
+            "ASIN",
+            "ATAN",
+            "ATAN2",
+            "ATAN",
+            "AVG",
+            "BETWEEN",
+            "NOT",
+            "BIN",
+            "BIN_TO_UUID",
+            "BINARY",
+            "BIT_AND",
+            "BIT_COUNT",
+            "BIT_OR",
+            "BIT_XOR",
+            "BM25",
+            "BSON_ARRAY_CONTAINS_BSON",
+            "BSON_ARRAY_PUSH",
+            "BSON_ARRAY_SLICE",
+            "BSON_BUILD_ARRAY",
+            "BSON_BUILD_OBJECT",
+            "BSON_COMPARE",
+            "BSON_EXTRACT_BIGINT",
+            "BSON_EXTRACT_BOOL",
+            "BSON_EXTRACT_BSON",
+            "BSON_EXTRACT_DATETIME",
+            "BSON_EXTRACT_DOUBLE",
+            "BSON_EXTRACT_STRING",
+            "BSON_GET_TYPE",
+            "BSON_INCLUDE_MASK",
+            "BSON_EXCLUDE_MASK",
+            "BSON_LENGTH",
+            "BSON_MATCH_ANY",
+            "BSON_MATCH_ANY_EXISTS",
+            "BSON_MERGE",
+            "BSON_NORMALIZE_",
+            *"BSON_SET_BSON",
+            "BSON_UNWIND",
+            "CASE",
+            "CEIL",
+            "CHAR",
+            "CHARACTER_LENGTH",
+            "CHARSET",
+            "COALESCE",
+            "CONCAT",
+            "CONCAT_WS",
+            "CONNECTION_ID",
+            "CONV",
+            "CAST",
+            "CONVERT",
+            "CONVERT_TZ",
+            "COS",
+            "COT",
+            "COUNT",
+            "CRC32",
+            "CURRENT_DATE",
+            "CURDATE",
+            "CURRENT_TIME",
+            "CURTIME",
+            "CURRENT_TIMESTAMP",
+            "DATABASE",
+            "DATE",
+            "DATE_ADD",
+            "DATE_FORMAT",
+            "DATE_SUB",
+            "DATE_TRUNC",
+            "DATEDIFF",
+            "DAY",
+            "DAYNAME",
+            "DAYOFWEEK",
+            "DAYOFYEAR",
+            "DECODE",
+            "DEGREES",
+            "DENSE_RANK",
+            "DOT_PRODUCT",
+            "ELT",
+            "ESTIMATED_QUERY_LEAF_MEMORY",
+            "ESTIMATED_QUERY_RUNTIME",
+            "EUCLIDEAN_DISTANCE",
+            "EXP",
+            "EXTRACT",
+            "FIELD",
+            "FIRST",
+            "FIRST_VALUE",
+            "FLOOR",
+            "FORMAT",
+            "FOUND_ROWS",
+            "FROM_BASE64",
+            "FROM_DAYS",
+            "FROM_UNIXTIME",
+            "GEOGRAPHY_AREA",
+            "GEOGRAPHY_CONTAINS",
+            "GEOGRAPHY_DISTANCE",
+            "GEOGRAPHY_INTERSECTS",
+            "GEOGRAPHY_LATITUDE",
+            "GEOGRAPHY_LENGTH",
+            "GEOGRAPHY_LONGITUDE",
+            "GEOGRAPHY_POINT",
+            "GEOGRAPHY_WITHIN_DISTANCE",
+            "GET_FORMAT",
+            "GREATEST",
+            "GROUP_CONCAT",
+            "HEX",
+            "HIGHLIGHT",
+            "HOUR",
+            "IF",
+            "IN",
+            "INET_ATON",
+            "INET_NTOA",
+            "INET6_ATON",
+            "INET6_NTOA",
+            "INITCAP",
+            "INSTR",
+            "IS_BSON_NULL",
+            "IS_UUID",
+            "ISNULL",
+            "ISNUMERIC",
+            "JSON_AGG",
+            "JSON_ARRAY_CONTAINS_DOUBLE",
+            "JSON_ARRAY_CONTAINS_STRING",
+            "JSON_ARRAY_CONTAINS_JSON",
+            "JSON_ARRAY_PACK",
+            "JSON_ARRAY_PUSH_DOUBLE",
+            "JSON_ARRAY_PUSH_STRING",
+            "JSON_ARRAY_PUSH_JSON",
+            "JSON_ARRAY_UNPACK",
+            "JSON_BUILD_ARRAY",
+            "JSON_BUILD_OBJECT",
+            "JSON_DELETE_KEY",
+            "JSON_EXTRACT_DOUBLE",
+            "JSON_EXTRACT_STRING",
+            "JSON_EXTRACT_JSON",
+            "JSON_EXTRACT_BIGINT",
+            "JSON_GET_TYPE",
+            "JSON_KEYS",
+            "JSON_LENGTH",
+            "JSON_MATCH_ANY",
+            "JSON_MERGE_PATCH",
+            "JSON_PRETTY",
+            "JSON_SET_DOUBLE",
+            "JSON_SET_STRING",
+            "JSON_SET_JSON",
+            "JSON_SPLICE_DOUBLE",
+            "JSON_SPLICE_STRING",
+            "JSON_SPLICE_JSON",
+            "JSON_TO_ARRAY",
+            "LAG",
+            "LAST",
+            "LAST_DAY",
+            "LAST_INSERT_ID",
+            "LAST_VALUE",
+            "LCASE",
+            "LEAD",
+            "LEAST",
+            "LEFT",
+            "LENGTH",
+            "LIKE",
+            "LN",
+            "LOCALTIMESTAMP",
+            "LOCATE",
+            "LOG",
+            "LOG10",
+            "LOG2",
+            "LPAD",
+            "LTRIM",
+            "MATCH",
+            "MAX",
+            "MD5",
+            "MEDIAN",
+            "MICROSECOND",
+            "MIN",
+            "MINUTE",
+            "MOD",
+            "MONTH",
+            "MONTHNAME",
+            "MONTHS_BETWEEN",
+            "NOPARAM",
+            "NOW",
+            "NTH_VALUE",
+            "NTILE",
+            "NULLIF",
+            "NVL",
+            "IFNULL",
+            "PERCENT_RANK",
+            "PERCENTILE_CONT",
+            "MEDIAN",
+            "PERCENTILE_DISC",
+            "PI",
+            "POW",
+            "QUARTER",
+            "QUOTE",
+            "RADIANS",
+            "RAND",
+            "RANK",
+            "REDUCE",
+            "REGEXP_INSTR",
+            "REGEXP_MATCH",
+            "REGEXP_REPLACE",
+            "REGEXP_SUBSTR",
+            "REPLACE",
+            "REVERSE",
+            "RIGHT",
+            "RLIKE",
+            "REGEXP",
+            "ROUND",
+            "ROW_COUNT",
+            "ROW_NUMBER",
+            "RPAD",
+            "RTRIM",
+            "SCALAR_VECTOR_MUL",
+            "SEC_TO_TIME",
+            "SECOND",
+            "SECRET",
+            "SET",
+            "SHA1",
+            "SHA2",
+            "SIGMOID",
+            "SIGN",
+            "SIN",
+            "SLEEP",
+            "SPLIT",
+            "SQRT",
+            "STD",
+            "STDDEV",
+            "STDDEV_POP",
+            "STDDEV_SAMP",
+            "STR_TO_DATE",
+            "strcmp",
+            "STRING_BYTES",
+            "SUBSTRING",
+            "SUBSTRING_INDEX",
+            "SUM",
+            "SYS_GUID",
+            "UUID",
+            "TAN",
+            "TIME",
+            "TIME_BUCKET",
+            "TIME_FORMAT",
+            "TIME_TO_SEC",
+            "TIMEDIFF",
+            "TIMESTAMP",
+            "TIMESTAMPADD",
+            "TIMESTAMPDIFF",
+            "TO_BASE64",
+            "TO_CHAR",
+            "TO_DATE",
+            "TO_DAYS",
+            "TO_JSON",
+            "TO_NUMBER",
+            "TO_SECONDS",
+            "TO_TIMESTAMP",
+            "TRIM",
+            "TRUNC",
+            "TRUNCATE",
+            "UCASE",
+            "UNHEX",
+            "UNIX_TIMESTAMP",
+            "USER",
+            "UTC_DATE",
+            "UTC_TIME",
+            "UTC_TIMESTAMP",
+            "UUID_TO_BIN",
+            "VARIANCE",
+            "VAR_SAMP",
+            "VECTOR_ADD",
+            "VECTOR_ELEMENTS_SUM",
+            "VECTOR_KTH_ELEMENT",
+            "VECTOR_MUL",
+            "VECTOR_NUM_ELEMENTS",
+            "VECTOR_SORT",
+            "VECTOR_SUB",
+            "VECTOR_SUBVECTOR",
+            "VECTOR_SUM",
+            "WEEK",
+            "WEEKDAY",
+            "YEAR",
+        ]
+
+        if (database_name := cls.get_default_schema(database, None)) is not 
None:
+            df = database.get_df(
+                f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+            )

Review Comment:
   ### Inefficient Function Name Resolution <sub>![category 
Performance](https://img.shields.io/badge/Performance-4f46e5)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The get_function_names method maintains a large hardcoded list of functions 
and then additionally queries the database to fetch more functions, potentially 
duplicating function names and performing unnecessary database calls.
   
   ###### Why this matters
   This approach adds overhead by making a database round-trip for every 
function name lookup, even though most standard functions are already known. 
The database query could be expensive when performed frequently, especially in 
autocomplete scenarios.
   
   ###### Suggested change ∙ *Feature Preview*
   Cache the database-specific functions on first retrieval and merge with the 
standard functions. Implement as:
   ```python
   @classmethod
   def get_function_names(cls, database: Database) -> list[str]:
       if not hasattr(cls, '_cached_db_functions'):
           cls._cached_db_functions = {}
       
       db_id = database.id
       if db_id not in cls._cached_db_functions:
           # Fetch from DB only once and cache
           if (database_name := cls.get_default_schema(database, None)) is not 
None:
               try:
                   df = database.get_df(f"SHOW FUNCTIONS IN 
`{database_name.replace('`', '``')}`")
                   function_name_col = f"Functions_in_{database_name}"
                   if function_name_col in df:
                       cls._cached_db_functions[db_id] = 
df[function_name_col].tolist()
                   else:
                       cls._cached_db_functions[db_id] = []
               except Exception:
                   cls._cached_db_functions[db_id] = []
                   logger.error("Error fetching database functions", 
exc_info=True)
       
       return cls.base_functions + cls._cached_db_functions.get(db_id, [])
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/a1be72a8-0476-40e6-a75b-24098c18dd76)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:95ebca98-a837-4861-9c4e-df23bb8db69c -->
   
   [](95ebca98-a837-4861-9c4e-df23bb8db69c)



##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping, 
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+    engine_name = "SingleStore"
+
+    engine = "singlestoredb"
+    drivers = {"singlestoredb": "SingleStore Python client"}
+    default_driver = "singlestoredb"
+
+    limit_method = LimitMethod.FORCE_LIMIT
+    allows_joins = True
+    allows_subqueries = True
+    allows_alias_in_select = True
+    allows_alias_in_orderby = True
+    time_groupby_inline = False
+    allows_alias_to_source_column = True
+    allows_hidden_orderby_agg = True
+    allows_hidden_cc_in_orderby = False
+    allows_cte_in_subquery = True
+    allow_limit_clause = True
+    max_column_name_length = 256
+    allows_sql_comments = True
+    allows_escaped_colons = True
+    supports_file_upload = True
+    supports_dynamic_schema = True
+    disable_ssh_tunneling = False
+
+    sqlalchemy_uri_placeholder = (
+        "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+    )
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+        TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+        TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+        TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+        TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+        TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+        TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+        TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+    }
+
+    column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+        (
+            re.compile(r"^tinyint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^mediumint", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^year", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bit", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(var)?binary", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^bson", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geographypoint", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geography", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^vector", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^enum", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^set", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+    )
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        """
+        Get a list of function names that are able to be called on the 
database.
+        Used for SQL Lab autocomplete.
+
+        :param database: The database to get functions for
+        :return: A list of function names usable in the database
+        """
+
+        functions = [
+            "ABS",
+            "ACOS",
+            "ADDTIME",
+            "AES_DECRYPT",
+            "AES_ENCRYPT",
+            "AGGREGATOR_ID",
+            "ANY_VALUE",
+            "APPROX_COUNT_DISTINCT",
+            "APPROX_COUNT_DISTINCT_ACCUMULATE",
+            "APPROX_COUNT_DISTINCT_COMBINE",
+            "APPROX_COUNT_DISTINCT_ESTIMATE",
+            "APPROX_GEOGRAPHY_INTERSECTS",
+            "APPROX_PERCENTILE",
+            "ASCII",
+            "ASIN",
+            "ATAN",
+            "ATAN2",
+            "ATAN",
+            "AVG",
+            "BETWEEN",
+            "NOT",
+            "BIN",
+            "BIN_TO_UUID",
+            "BINARY",
+            "BIT_AND",
+            "BIT_COUNT",
+            "BIT_OR",
+            "BIT_XOR",
+            "BM25",
+            "BSON_ARRAY_CONTAINS_BSON",
+            "BSON_ARRAY_PUSH",
+            "BSON_ARRAY_SLICE",
+            "BSON_BUILD_ARRAY",
+            "BSON_BUILD_OBJECT",
+            "BSON_COMPARE",
+            "BSON_EXTRACT_BIGINT",
+            "BSON_EXTRACT_BOOL",
+            "BSON_EXTRACT_BSON",
+            "BSON_EXTRACT_DATETIME",
+            "BSON_EXTRACT_DOUBLE",
+            "BSON_EXTRACT_STRING",
+            "BSON_GET_TYPE",
+            "BSON_INCLUDE_MASK",
+            "BSON_EXCLUDE_MASK",
+            "BSON_LENGTH",
+            "BSON_MATCH_ANY",
+            "BSON_MATCH_ANY_EXISTS",
+            "BSON_MERGE",
+            "BSON_NORMALIZE_",
+            *"BSON_SET_BSON",
+            "BSON_UNWIND",
+            "CASE",
+            "CEIL",
+            "CHAR",
+            "CHARACTER_LENGTH",
+            "CHARSET",
+            "COALESCE",
+            "CONCAT",
+            "CONCAT_WS",
+            "CONNECTION_ID",
+            "CONV",
+            "CAST",
+            "CONVERT",
+            "CONVERT_TZ",
+            "COS",
+            "COT",
+            "COUNT",
+            "CRC32",
+            "CURRENT_DATE",
+            "CURDATE",
+            "CURRENT_TIME",
+            "CURTIME",
+            "CURRENT_TIMESTAMP",
+            "DATABASE",
+            "DATE",
+            "DATE_ADD",
+            "DATE_FORMAT",
+            "DATE_SUB",
+            "DATE_TRUNC",
+            "DATEDIFF",
+            "DAY",
+            "DAYNAME",
+            "DAYOFWEEK",
+            "DAYOFYEAR",
+            "DECODE",
+            "DEGREES",
+            "DENSE_RANK",
+            "DOT_PRODUCT",
+            "ELT",
+            "ESTIMATED_QUERY_LEAF_MEMORY",
+            "ESTIMATED_QUERY_RUNTIME",
+            "EUCLIDEAN_DISTANCE",
+            "EXP",
+            "EXTRACT",
+            "FIELD",
+            "FIRST",
+            "FIRST_VALUE",
+            "FLOOR",
+            "FORMAT",
+            "FOUND_ROWS",
+            "FROM_BASE64",
+            "FROM_DAYS",
+            "FROM_UNIXTIME",
+            "GEOGRAPHY_AREA",
+            "GEOGRAPHY_CONTAINS",
+            "GEOGRAPHY_DISTANCE",
+            "GEOGRAPHY_INTERSECTS",
+            "GEOGRAPHY_LATITUDE",
+            "GEOGRAPHY_LENGTH",
+            "GEOGRAPHY_LONGITUDE",
+            "GEOGRAPHY_POINT",
+            "GEOGRAPHY_WITHIN_DISTANCE",
+            "GET_FORMAT",
+            "GREATEST",
+            "GROUP_CONCAT",
+            "HEX",
+            "HIGHLIGHT",
+            "HOUR",
+            "IF",
+            "IN",
+            "INET_ATON",
+            "INET_NTOA",
+            "INET6_ATON",
+            "INET6_NTOA",
+            "INITCAP",
+            "INSTR",
+            "IS_BSON_NULL",
+            "IS_UUID",
+            "ISNULL",
+            "ISNUMERIC",
+            "JSON_AGG",
+            "JSON_ARRAY_CONTAINS_DOUBLE",
+            "JSON_ARRAY_CONTAINS_STRING",
+            "JSON_ARRAY_CONTAINS_JSON",
+            "JSON_ARRAY_PACK",
+            "JSON_ARRAY_PUSH_DOUBLE",
+            "JSON_ARRAY_PUSH_STRING",
+            "JSON_ARRAY_PUSH_JSON",
+            "JSON_ARRAY_UNPACK",
+            "JSON_BUILD_ARRAY",
+            "JSON_BUILD_OBJECT",
+            "JSON_DELETE_KEY",
+            "JSON_EXTRACT_DOUBLE",
+            "JSON_EXTRACT_STRING",
+            "JSON_EXTRACT_JSON",
+            "JSON_EXTRACT_BIGINT",
+            "JSON_GET_TYPE",
+            "JSON_KEYS",
+            "JSON_LENGTH",
+            "JSON_MATCH_ANY",
+            "JSON_MERGE_PATCH",
+            "JSON_PRETTY",
+            "JSON_SET_DOUBLE",
+            "JSON_SET_STRING",
+            "JSON_SET_JSON",
+            "JSON_SPLICE_DOUBLE",
+            "JSON_SPLICE_STRING",
+            "JSON_SPLICE_JSON",
+            "JSON_TO_ARRAY",
+            "LAG",
+            "LAST",
+            "LAST_DAY",
+            "LAST_INSERT_ID",
+            "LAST_VALUE",
+            "LCASE",
+            "LEAD",
+            "LEAST",
+            "LEFT",
+            "LENGTH",
+            "LIKE",
+            "LN",
+            "LOCALTIMESTAMP",
+            "LOCATE",
+            "LOG",
+            "LOG10",
+            "LOG2",
+            "LPAD",
+            "LTRIM",
+            "MATCH",
+            "MAX",
+            "MD5",
+            "MEDIAN",
+            "MICROSECOND",
+            "MIN",
+            "MINUTE",
+            "MOD",
+            "MONTH",
+            "MONTHNAME",
+            "MONTHS_BETWEEN",
+            "NOPARAM",
+            "NOW",
+            "NTH_VALUE",
+            "NTILE",
+            "NULLIF",
+            "NVL",
+            "IFNULL",
+            "PERCENT_RANK",
+            "PERCENTILE_CONT",
+            "MEDIAN",
+            "PERCENTILE_DISC",
+            "PI",
+            "POW",
+            "QUARTER",
+            "QUOTE",
+            "RADIANS",
+            "RAND",
+            "RANK",
+            "REDUCE",
+            "REGEXP_INSTR",
+            "REGEXP_MATCH",
+            "REGEXP_REPLACE",
+            "REGEXP_SUBSTR",
+            "REPLACE",
+            "REVERSE",
+            "RIGHT",
+            "RLIKE",
+            "REGEXP",
+            "ROUND",
+            "ROW_COUNT",
+            "ROW_NUMBER",
+            "RPAD",
+            "RTRIM",
+            "SCALAR_VECTOR_MUL",
+            "SEC_TO_TIME",
+            "SECOND",
+            "SECRET",
+            "SET",
+            "SHA1",
+            "SHA2",
+            "SIGMOID",
+            "SIGN",
+            "SIN",
+            "SLEEP",
+            "SPLIT",
+            "SQRT",
+            "STD",
+            "STDDEV",
+            "STDDEV_POP",
+            "STDDEV_SAMP",
+            "STR_TO_DATE",
+            "strcmp",
+            "STRING_BYTES",
+            "SUBSTRING",
+            "SUBSTRING_INDEX",
+            "SUM",
+            "SYS_GUID",
+            "UUID",
+            "TAN",
+            "TIME",
+            "TIME_BUCKET",
+            "TIME_FORMAT",
+            "TIME_TO_SEC",
+            "TIMEDIFF",
+            "TIMESTAMP",
+            "TIMESTAMPADD",
+            "TIMESTAMPDIFF",
+            "TO_BASE64",
+            "TO_CHAR",
+            "TO_DATE",
+            "TO_DAYS",
+            "TO_JSON",
+            "TO_NUMBER",
+            "TO_SECONDS",
+            "TO_TIMESTAMP",
+            "TRIM",
+            "TRUNC",
+            "TRUNCATE",
+            "UCASE",
+            "UNHEX",
+            "UNIX_TIMESTAMP",
+            "USER",
+            "UTC_DATE",
+            "UTC_TIME",
+            "UTC_TIMESTAMP",
+            "UUID_TO_BIN",
+            "VARIANCE",
+            "VAR_SAMP",
+            "VECTOR_ADD",
+            "VECTOR_ELEMENTS_SUM",
+            "VECTOR_KTH_ELEMENT",
+            "VECTOR_MUL",
+            "VECTOR_NUM_ELEMENTS",
+            "VECTOR_SORT",
+            "VECTOR_SUB",
+            "VECTOR_SUBVECTOR",
+            "VECTOR_SUM",
+            "WEEK",
+            "WEEKDAY",
+            "YEAR",
+        ]
+
+        if (database_name := cls.get_default_schema(database, None)) is not 
None:
+            df = database.get_df(
+                f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+            )
+
+            function_name_col = f"Functions_in_{database_name}"
+            if function_name_col in df:
+                functions += df[function_name_col].tolist()
+            else:
+                columns = df.columns.values.tolist()
+                logger.error(
+                    "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+                    "Expected column `%s`, found: %s.",
+                    function_name_col,
+                    ", ".join(columns),
+                    exc_info=True,
+                )
+
+        return functions
+
+    @classmethod
+    def epoch_to_dttm(cls) -> str:
+        return "from_unixtime({col})"
+
+    @classmethod
+    def convert_dttm(
+        cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, 
Any]] = None
+    ) -> Optional[str]:
+        sqla_type = cls.get_sqla_column_type(target_type)
+
+        if isinstance(sqla_type, types.Date):
+            return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+        if isinstance(sqla_type, types.TIMESTAMP):
+            return f"""('{dttm.isoformat(sep=" ", timespec="microseconds")}' 
:> TIMESTAMP(6))"""  # noqa: E501
+        if isinstance(sqla_type, types.DateTime):
+            return f"""CAST('{dttm.isoformat(sep=" ", 
timespec="microseconds")}' AS DATETIME(6))"""  # noqa: E501
+        if isinstance(sqla_type, types.Time):
+            return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME(6))"""
+
+        return None
+
+    @classmethod
+    def adjust_engine_params(
+        cls,
+        uri: URL,
+        connect_args: dict[str, Any],
+        catalog: Optional[str] = None,
+        schema: Optional[str] = None,
+    ) -> tuple[URL, dict[str, Any]]:
+        uri, new_connect_args = super().adjust_engine_params(
+            uri,
+            connect_args,
+            catalog,
+            schema,
+        )
+
+        if schema:
+            uri = uri.set(database=parse.quote(schema, safe=""))
+
+        return uri, new_connect_args
+
+    @classmethod
+    def get_schema_from_engine_params(
+        cls,
+        sqlalchemy_uri: URL,
+        connect_args: dict[str, Any],
+    ) -> Optional[str]:
+        """
+        Return the configured schema.
+
+        A MySQL database is a SQLAlchemy schema.
+        """
+        return parse.unquote(sqlalchemy_uri.database)

Review Comment:
   ### Missing None Check in Schema Extraction <sub>![category 
Functionality](https://img.shields.io/badge/Functionality-0284c7)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The get_schema_from_engine_params method doesn't handle the case where 
sqlalchemy_uri.database is None.
   
   ###### Why this matters
   This could lead to a TypeError when trying to unquote None, causing 
unexpected crashes in the application.
   
   ###### Suggested change ∙ *Feature Preview*
   Add None check before unquoting:
   ```python
   return parse.unquote(sqlalchemy_uri.database) if sqlalchemy_uri.database 
else None
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/97ed4b6a-8151-4364-8188-afb7363debcb)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:72f11f02-2613-4fc5-81f4-42ba0cd60d22 -->
   
   [](72f11f02-2613-4fc5-81f4-42ba0cd60d22)



##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping, 
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+    engine_name = "SingleStore"
+
+    engine = "singlestoredb"
+    drivers = {"singlestoredb": "SingleStore Python client"}
+    default_driver = "singlestoredb"
+
+    limit_method = LimitMethod.FORCE_LIMIT
+    allows_joins = True
+    allows_subqueries = True
+    allows_alias_in_select = True
+    allows_alias_in_orderby = True
+    time_groupby_inline = False
+    allows_alias_to_source_column = True
+    allows_hidden_orderby_agg = True
+    allows_hidden_cc_in_orderby = False
+    allows_cte_in_subquery = True
+    allow_limit_clause = True
+    max_column_name_length = 256
+    allows_sql_comments = True
+    allows_escaped_colons = True
+    supports_file_upload = True
+    supports_dynamic_schema = True
+    disable_ssh_tunneling = False
+
+    sqlalchemy_uri_placeholder = (
+        "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+    )
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+        TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+        TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+        TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+        TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+        TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+        TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+        TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+    }
+
+    column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+        (
+            re.compile(r"^tinyint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^mediumint", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^year", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bit", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(var)?binary", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^bson", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geographypoint", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geography", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^vector", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^enum", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^set", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+    )
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        """
+        Get a list of function names that are able to be called on the 
database.
+        Used for SQL Lab autocomplete.
+
+        :param database: The database to get functions for
+        :return: A list of function names usable in the database
+        """
+
+        functions = [
+            "ABS",
+            "ACOS",
+            "ADDTIME",
+            "AES_DECRYPT",
+            "AES_ENCRYPT",
+            "AGGREGATOR_ID",
+            "ANY_VALUE",
+            "APPROX_COUNT_DISTINCT",
+            "APPROX_COUNT_DISTINCT_ACCUMULATE",
+            "APPROX_COUNT_DISTINCT_COMBINE",
+            "APPROX_COUNT_DISTINCT_ESTIMATE",
+            "APPROX_GEOGRAPHY_INTERSECTS",
+            "APPROX_PERCENTILE",
+            "ASCII",
+            "ASIN",
+            "ATAN",
+            "ATAN2",
+            "ATAN",
+            "AVG",
+            "BETWEEN",
+            "NOT",
+            "BIN",
+            "BIN_TO_UUID",
+            "BINARY",
+            "BIT_AND",
+            "BIT_COUNT",
+            "BIT_OR",
+            "BIT_XOR",
+            "BM25",
+            "BSON_ARRAY_CONTAINS_BSON",
+            "BSON_ARRAY_PUSH",
+            "BSON_ARRAY_SLICE",
+            "BSON_BUILD_ARRAY",
+            "BSON_BUILD_OBJECT",
+            "BSON_COMPARE",
+            "BSON_EXTRACT_BIGINT",
+            "BSON_EXTRACT_BOOL",
+            "BSON_EXTRACT_BSON",
+            "BSON_EXTRACT_DATETIME",
+            "BSON_EXTRACT_DOUBLE",
+            "BSON_EXTRACT_STRING",
+            "BSON_GET_TYPE",
+            "BSON_INCLUDE_MASK",
+            "BSON_EXCLUDE_MASK",
+            "BSON_LENGTH",
+            "BSON_MATCH_ANY",
+            "BSON_MATCH_ANY_EXISTS",
+            "BSON_MERGE",
+            "BSON_NORMALIZE_",
+            *"BSON_SET_BSON",
+            "BSON_UNWIND",
+            "CASE",
+            "CEIL",
+            "CHAR",
+            "CHARACTER_LENGTH",
+            "CHARSET",
+            "COALESCE",
+            "CONCAT",
+            "CONCAT_WS",
+            "CONNECTION_ID",
+            "CONV",
+            "CAST",
+            "CONVERT",
+            "CONVERT_TZ",
+            "COS",
+            "COT",
+            "COUNT",
+            "CRC32",
+            "CURRENT_DATE",
+            "CURDATE",
+            "CURRENT_TIME",
+            "CURTIME",
+            "CURRENT_TIMESTAMP",
+            "DATABASE",
+            "DATE",
+            "DATE_ADD",
+            "DATE_FORMAT",
+            "DATE_SUB",
+            "DATE_TRUNC",
+            "DATEDIFF",
+            "DAY",
+            "DAYNAME",
+            "DAYOFWEEK",
+            "DAYOFYEAR",
+            "DECODE",
+            "DEGREES",
+            "DENSE_RANK",
+            "DOT_PRODUCT",
+            "ELT",
+            "ESTIMATED_QUERY_LEAF_MEMORY",
+            "ESTIMATED_QUERY_RUNTIME",
+            "EUCLIDEAN_DISTANCE",
+            "EXP",
+            "EXTRACT",
+            "FIELD",
+            "FIRST",
+            "FIRST_VALUE",
+            "FLOOR",
+            "FORMAT",
+            "FOUND_ROWS",
+            "FROM_BASE64",
+            "FROM_DAYS",
+            "FROM_UNIXTIME",
+            "GEOGRAPHY_AREA",
+            "GEOGRAPHY_CONTAINS",
+            "GEOGRAPHY_DISTANCE",
+            "GEOGRAPHY_INTERSECTS",
+            "GEOGRAPHY_LATITUDE",
+            "GEOGRAPHY_LENGTH",
+            "GEOGRAPHY_LONGITUDE",
+            "GEOGRAPHY_POINT",
+            "GEOGRAPHY_WITHIN_DISTANCE",
+            "GET_FORMAT",
+            "GREATEST",
+            "GROUP_CONCAT",
+            "HEX",
+            "HIGHLIGHT",
+            "HOUR",
+            "IF",
+            "IN",
+            "INET_ATON",
+            "INET_NTOA",
+            "INET6_ATON",
+            "INET6_NTOA",
+            "INITCAP",
+            "INSTR",
+            "IS_BSON_NULL",
+            "IS_UUID",
+            "ISNULL",
+            "ISNUMERIC",
+            "JSON_AGG",
+            "JSON_ARRAY_CONTAINS_DOUBLE",
+            "JSON_ARRAY_CONTAINS_STRING",
+            "JSON_ARRAY_CONTAINS_JSON",
+            "JSON_ARRAY_PACK",
+            "JSON_ARRAY_PUSH_DOUBLE",
+            "JSON_ARRAY_PUSH_STRING",
+            "JSON_ARRAY_PUSH_JSON",
+            "JSON_ARRAY_UNPACK",
+            "JSON_BUILD_ARRAY",
+            "JSON_BUILD_OBJECT",
+            "JSON_DELETE_KEY",
+            "JSON_EXTRACT_DOUBLE",
+            "JSON_EXTRACT_STRING",
+            "JSON_EXTRACT_JSON",
+            "JSON_EXTRACT_BIGINT",
+            "JSON_GET_TYPE",
+            "JSON_KEYS",
+            "JSON_LENGTH",
+            "JSON_MATCH_ANY",
+            "JSON_MERGE_PATCH",
+            "JSON_PRETTY",
+            "JSON_SET_DOUBLE",
+            "JSON_SET_STRING",
+            "JSON_SET_JSON",
+            "JSON_SPLICE_DOUBLE",
+            "JSON_SPLICE_STRING",
+            "JSON_SPLICE_JSON",
+            "JSON_TO_ARRAY",
+            "LAG",
+            "LAST",
+            "LAST_DAY",
+            "LAST_INSERT_ID",
+            "LAST_VALUE",
+            "LCASE",
+            "LEAD",
+            "LEAST",
+            "LEFT",
+            "LENGTH",
+            "LIKE",
+            "LN",
+            "LOCALTIMESTAMP",
+            "LOCATE",
+            "LOG",
+            "LOG10",
+            "LOG2",
+            "LPAD",
+            "LTRIM",
+            "MATCH",
+            "MAX",
+            "MD5",
+            "MEDIAN",
+            "MICROSECOND",
+            "MIN",
+            "MINUTE",
+            "MOD",
+            "MONTH",
+            "MONTHNAME",
+            "MONTHS_BETWEEN",
+            "NOPARAM",
+            "NOW",
+            "NTH_VALUE",
+            "NTILE",
+            "NULLIF",
+            "NVL",
+            "IFNULL",
+            "PERCENT_RANK",
+            "PERCENTILE_CONT",
+            "MEDIAN",
+            "PERCENTILE_DISC",
+            "PI",
+            "POW",
+            "QUARTER",
+            "QUOTE",
+            "RADIANS",
+            "RAND",
+            "RANK",
+            "REDUCE",
+            "REGEXP_INSTR",
+            "REGEXP_MATCH",
+            "REGEXP_REPLACE",
+            "REGEXP_SUBSTR",
+            "REPLACE",
+            "REVERSE",
+            "RIGHT",
+            "RLIKE",
+            "REGEXP",
+            "ROUND",
+            "ROW_COUNT",
+            "ROW_NUMBER",
+            "RPAD",
+            "RTRIM",
+            "SCALAR_VECTOR_MUL",
+            "SEC_TO_TIME",
+            "SECOND",
+            "SECRET",
+            "SET",
+            "SHA1",
+            "SHA2",
+            "SIGMOID",
+            "SIGN",
+            "SIN",
+            "SLEEP",
+            "SPLIT",
+            "SQRT",
+            "STD",
+            "STDDEV",
+            "STDDEV_POP",
+            "STDDEV_SAMP",
+            "STR_TO_DATE",
+            "strcmp",
+            "STRING_BYTES",
+            "SUBSTRING",
+            "SUBSTRING_INDEX",
+            "SUM",
+            "SYS_GUID",
+            "UUID",
+            "TAN",
+            "TIME",
+            "TIME_BUCKET",
+            "TIME_FORMAT",
+            "TIME_TO_SEC",
+            "TIMEDIFF",
+            "TIMESTAMP",
+            "TIMESTAMPADD",
+            "TIMESTAMPDIFF",
+            "TO_BASE64",
+            "TO_CHAR",
+            "TO_DATE",
+            "TO_DAYS",
+            "TO_JSON",
+            "TO_NUMBER",
+            "TO_SECONDS",
+            "TO_TIMESTAMP",
+            "TRIM",
+            "TRUNC",
+            "TRUNCATE",
+            "UCASE",
+            "UNHEX",
+            "UNIX_TIMESTAMP",
+            "USER",
+            "UTC_DATE",
+            "UTC_TIME",
+            "UTC_TIMESTAMP",
+            "UUID_TO_BIN",
+            "VARIANCE",
+            "VAR_SAMP",
+            "VECTOR_ADD",
+            "VECTOR_ELEMENTS_SUM",
+            "VECTOR_KTH_ELEMENT",
+            "VECTOR_MUL",
+            "VECTOR_NUM_ELEMENTS",
+            "VECTOR_SORT",
+            "VECTOR_SUB",
+            "VECTOR_SUBVECTOR",
+            "VECTOR_SUM",
+            "WEEK",
+            "WEEKDAY",
+            "YEAR",
+        ]
+
+        if (database_name := cls.get_default_schema(database, None)) is not 
None:
+            df = database.get_df(
+                f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+            )
+
+            function_name_col = f"Functions_in_{database_name}"
+            if function_name_col in df:
+                functions += df[function_name_col].tolist()
+            else:
+                columns = df.columns.values.tolist()
+                logger.error(
+                    "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+                    "Expected column `%s`, found: %s.",
+                    function_name_col,
+                    ", ".join(columns),
+                    exc_info=True,
+                )
+
+        return functions
+
+    @classmethod
+    def epoch_to_dttm(cls) -> str:
+        return "from_unixtime({col})"
+
+    @classmethod
+    def convert_dttm(
+        cls, target_type: str, dttm: datetime, db_extra: Optional[dict[str, 
Any]] = None
+    ) -> Optional[str]:
+        sqla_type = cls.get_sqla_column_type(target_type)
+
+        if isinstance(sqla_type, types.Date):
+            return f"CAST('{dttm.date().isoformat()}' AS DATE)"
+        if isinstance(sqla_type, types.TIMESTAMP):
+            return f"""('{dttm.isoformat(sep=" ", timespec="microseconds")}' 
:> TIMESTAMP(6))"""  # noqa: E501
+        if isinstance(sqla_type, types.DateTime):
+            return f"""CAST('{dttm.isoformat(sep=" ", 
timespec="microseconds")}' AS DATETIME(6))"""  # noqa: E501
+        if isinstance(sqla_type, types.Time):
+            return f"""CAST('{dttm.strftime("%H:%M:%S.%f")}' AS TIME(6))"""
+
+        return None
+
+    @classmethod
+    def adjust_engine_params(
+        cls,
+        uri: URL,
+        connect_args: dict[str, Any],
+        catalog: Optional[str] = None,
+        schema: Optional[str] = None,
+    ) -> tuple[URL, dict[str, Any]]:
+        uri, new_connect_args = super().adjust_engine_params(
+            uri,
+            connect_args,
+            catalog,
+            schema,
+        )
+
+        if schema:
+            uri = uri.set(database=parse.quote(schema, safe=""))
+
+        return uri, new_connect_args
+
+    @classmethod
+    def get_schema_from_engine_params(
+        cls,
+        sqlalchemy_uri: URL,
+        connect_args: dict[str, Any],
+    ) -> Optional[str]:
+        """
+        Return the configured schema.
+
+        A MySQL database is a SQLAlchemy schema.
+        """

Review Comment:
   ### Incorrect database reference in docstring <sub>![category 
Documentation](https://img.shields.io/badge/Documentation-7c3aed)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The docstring contains incorrect/confusing information by referencing MySQL 
when this is a SingleStore spec.
   
   ###### Why this matters
   Mentioning MySQL in SingleStore-specific code could mislead developers about 
the database system they're working with.
   
   ###### Suggested change ∙ *Feature Preview*
   """Return the configured schema.
   
   In SingleStore, like MySQL, the database name serves as the schema name in 
SQLAlchemy.
   """
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/56710688-505c-498e-96a9-71e0ecc1ee2a)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:fe3a50cf-a684-41a1-bb3f-4b10a141783b -->
   
   [](fe3a50cf-a684-41a1-bb3f-4b10a141783b)



##########
superset/db_engine_specs/singlestore.py:
##########
@@ -0,0 +1,536 @@
+import logging
+import re
+from datetime import datetime
+from typing import Any, Optional
+from urllib import parse
+
+from sqlalchemy import types
+from sqlalchemy.engine import URL
+
+from superset.constants import TimeGrain
+from superset.db_engine_specs.base import BaseEngineSpec, ColumnTypeMapping, 
LimitMethod
+from superset.models.core import Database
+from superset.models.sql_lab import Query
+from superset.utils.core import GenericDataType
+
+logger = logging.getLogger(__name__)
+
+
+class SingleStoreSpec(BaseEngineSpec):
+    engine_name = "SingleStore"
+
+    engine = "singlestoredb"
+    drivers = {"singlestoredb": "SingleStore Python client"}
+    default_driver = "singlestoredb"
+
+    limit_method = LimitMethod.FORCE_LIMIT
+    allows_joins = True
+    allows_subqueries = True
+    allows_alias_in_select = True
+    allows_alias_in_orderby = True
+    time_groupby_inline = False
+    allows_alias_to_source_column = True
+    allows_hidden_orderby_agg = True
+    allows_hidden_cc_in_orderby = False
+    allows_cte_in_subquery = True
+    allow_limit_clause = True
+    max_column_name_length = 256
+    allows_sql_comments = True
+    allows_escaped_colons = True
+    supports_file_upload = True
+    supports_dynamic_schema = True
+    disable_ssh_tunneling = False
+
+    sqlalchemy_uri_placeholder = (
+        "singlestoredb://{username}:{password}@{host}:{port}/{database}"
+    )
+
+    _time_grain_expressions = {
+        None: "{col}",
+        TimeGrain.SECOND: "DATE_TRUNC('second', {col})",
+        TimeGrain.MINUTE: "DATE_TRUNC('minute', {col})",
+        TimeGrain.HOUR: "DATE_TRUNC('hour', {col})",
+        TimeGrain.DAY: "DATE_TRUNC('day', {col})",
+        TimeGrain.WEEK: "DATE_TRUNC('week', {col})",
+        TimeGrain.MONTH: "DATE_TRUNC('month', {col})",
+        TimeGrain.QUARTER: "DATE_TRUNC('quarter', {col})",
+        TimeGrain.YEAR: "DATE_TRUNC('year', {col})",
+    }
+
+    column_type_mappings: tuple[ColumnTypeMapping, ...] = (
+        (
+            re.compile(r"^tinyint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^mediumint", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^year", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bit", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(var)?binary", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^(tiny|medium|long)?blob", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^json", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^bson", re.IGNORECASE),
+            types.LargeBinary(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geographypoint", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^geography", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^vector", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^enum", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^set", re.IGNORECASE),
+            types.String(),
+            GenericDataType.STRING,
+        ),
+    )
+
+    @classmethod
+    def get_function_names(
+        cls,
+        database: Database,
+    ) -> list[str]:
+        """
+        Get a list of function names that are able to be called on the 
database.
+        Used for SQL Lab autocomplete.
+
+        :param database: The database to get functions for
+        :return: A list of function names usable in the database
+        """
+
+        functions = [
+            "ABS",
+            "ACOS",
+            "ADDTIME",
+            "AES_DECRYPT",
+            "AES_ENCRYPT",
+            "AGGREGATOR_ID",
+            "ANY_VALUE",
+            "APPROX_COUNT_DISTINCT",
+            "APPROX_COUNT_DISTINCT_ACCUMULATE",
+            "APPROX_COUNT_DISTINCT_COMBINE",
+            "APPROX_COUNT_DISTINCT_ESTIMATE",
+            "APPROX_GEOGRAPHY_INTERSECTS",
+            "APPROX_PERCENTILE",
+            "ASCII",
+            "ASIN",
+            "ATAN",
+            "ATAN2",
+            "ATAN",
+            "AVG",
+            "BETWEEN",
+            "NOT",
+            "BIN",
+            "BIN_TO_UUID",
+            "BINARY",
+            "BIT_AND",
+            "BIT_COUNT",
+            "BIT_OR",
+            "BIT_XOR",
+            "BM25",
+            "BSON_ARRAY_CONTAINS_BSON",
+            "BSON_ARRAY_PUSH",
+            "BSON_ARRAY_SLICE",
+            "BSON_BUILD_ARRAY",
+            "BSON_BUILD_OBJECT",
+            "BSON_COMPARE",
+            "BSON_EXTRACT_BIGINT",
+            "BSON_EXTRACT_BOOL",
+            "BSON_EXTRACT_BSON",
+            "BSON_EXTRACT_DATETIME",
+            "BSON_EXTRACT_DOUBLE",
+            "BSON_EXTRACT_STRING",
+            "BSON_GET_TYPE",
+            "BSON_INCLUDE_MASK",
+            "BSON_EXCLUDE_MASK",
+            "BSON_LENGTH",
+            "BSON_MATCH_ANY",
+            "BSON_MATCH_ANY_EXISTS",
+            "BSON_MERGE",
+            "BSON_NORMALIZE_",
+            *"BSON_SET_BSON",
+            "BSON_UNWIND",
+            "CASE",
+            "CEIL",
+            "CHAR",
+            "CHARACTER_LENGTH",
+            "CHARSET",
+            "COALESCE",
+            "CONCAT",
+            "CONCAT_WS",
+            "CONNECTION_ID",
+            "CONV",
+            "CAST",
+            "CONVERT",
+            "CONVERT_TZ",
+            "COS",
+            "COT",
+            "COUNT",
+            "CRC32",
+            "CURRENT_DATE",
+            "CURDATE",
+            "CURRENT_TIME",
+            "CURTIME",
+            "CURRENT_TIMESTAMP",
+            "DATABASE",
+            "DATE",
+            "DATE_ADD",
+            "DATE_FORMAT",
+            "DATE_SUB",
+            "DATE_TRUNC",
+            "DATEDIFF",
+            "DAY",
+            "DAYNAME",
+            "DAYOFWEEK",
+            "DAYOFYEAR",
+            "DECODE",
+            "DEGREES",
+            "DENSE_RANK",
+            "DOT_PRODUCT",
+            "ELT",
+            "ESTIMATED_QUERY_LEAF_MEMORY",
+            "ESTIMATED_QUERY_RUNTIME",
+            "EUCLIDEAN_DISTANCE",
+            "EXP",
+            "EXTRACT",
+            "FIELD",
+            "FIRST",
+            "FIRST_VALUE",
+            "FLOOR",
+            "FORMAT",
+            "FOUND_ROWS",
+            "FROM_BASE64",
+            "FROM_DAYS",
+            "FROM_UNIXTIME",
+            "GEOGRAPHY_AREA",
+            "GEOGRAPHY_CONTAINS",
+            "GEOGRAPHY_DISTANCE",
+            "GEOGRAPHY_INTERSECTS",
+            "GEOGRAPHY_LATITUDE",
+            "GEOGRAPHY_LENGTH",
+            "GEOGRAPHY_LONGITUDE",
+            "GEOGRAPHY_POINT",
+            "GEOGRAPHY_WITHIN_DISTANCE",
+            "GET_FORMAT",
+            "GREATEST",
+            "GROUP_CONCAT",
+            "HEX",
+            "HIGHLIGHT",
+            "HOUR",
+            "IF",
+            "IN",
+            "INET_ATON",
+            "INET_NTOA",
+            "INET6_ATON",
+            "INET6_NTOA",
+            "INITCAP",
+            "INSTR",
+            "IS_BSON_NULL",
+            "IS_UUID",
+            "ISNULL",
+            "ISNUMERIC",
+            "JSON_AGG",
+            "JSON_ARRAY_CONTAINS_DOUBLE",
+            "JSON_ARRAY_CONTAINS_STRING",
+            "JSON_ARRAY_CONTAINS_JSON",
+            "JSON_ARRAY_PACK",
+            "JSON_ARRAY_PUSH_DOUBLE",
+            "JSON_ARRAY_PUSH_STRING",
+            "JSON_ARRAY_PUSH_JSON",
+            "JSON_ARRAY_UNPACK",
+            "JSON_BUILD_ARRAY",
+            "JSON_BUILD_OBJECT",
+            "JSON_DELETE_KEY",
+            "JSON_EXTRACT_DOUBLE",
+            "JSON_EXTRACT_STRING",
+            "JSON_EXTRACT_JSON",
+            "JSON_EXTRACT_BIGINT",
+            "JSON_GET_TYPE",
+            "JSON_KEYS",
+            "JSON_LENGTH",
+            "JSON_MATCH_ANY",
+            "JSON_MERGE_PATCH",
+            "JSON_PRETTY",
+            "JSON_SET_DOUBLE",
+            "JSON_SET_STRING",
+            "JSON_SET_JSON",
+            "JSON_SPLICE_DOUBLE",
+            "JSON_SPLICE_STRING",
+            "JSON_SPLICE_JSON",
+            "JSON_TO_ARRAY",
+            "LAG",
+            "LAST",
+            "LAST_DAY",
+            "LAST_INSERT_ID",
+            "LAST_VALUE",
+            "LCASE",
+            "LEAD",
+            "LEAST",
+            "LEFT",
+            "LENGTH",
+            "LIKE",
+            "LN",
+            "LOCALTIMESTAMP",
+            "LOCATE",
+            "LOG",
+            "LOG10",
+            "LOG2",
+            "LPAD",
+            "LTRIM",
+            "MATCH",
+            "MAX",
+            "MD5",
+            "MEDIAN",
+            "MICROSECOND",
+            "MIN",
+            "MINUTE",
+            "MOD",
+            "MONTH",
+            "MONTHNAME",
+            "MONTHS_BETWEEN",
+            "NOPARAM",
+            "NOW",
+            "NTH_VALUE",
+            "NTILE",
+            "NULLIF",
+            "NVL",
+            "IFNULL",
+            "PERCENT_RANK",
+            "PERCENTILE_CONT",
+            "MEDIAN",
+            "PERCENTILE_DISC",
+            "PI",
+            "POW",
+            "QUARTER",
+            "QUOTE",
+            "RADIANS",
+            "RAND",
+            "RANK",
+            "REDUCE",
+            "REGEXP_INSTR",
+            "REGEXP_MATCH",
+            "REGEXP_REPLACE",
+            "REGEXP_SUBSTR",
+            "REPLACE",
+            "REVERSE",
+            "RIGHT",
+            "RLIKE",
+            "REGEXP",
+            "ROUND",
+            "ROW_COUNT",
+            "ROW_NUMBER",
+            "RPAD",
+            "RTRIM",
+            "SCALAR_VECTOR_MUL",
+            "SEC_TO_TIME",
+            "SECOND",
+            "SECRET",
+            "SET",
+            "SHA1",
+            "SHA2",
+            "SIGMOID",
+            "SIGN",
+            "SIN",
+            "SLEEP",
+            "SPLIT",
+            "SQRT",
+            "STD",
+            "STDDEV",
+            "STDDEV_POP",
+            "STDDEV_SAMP",
+            "STR_TO_DATE",
+            "strcmp",
+            "STRING_BYTES",
+            "SUBSTRING",
+            "SUBSTRING_INDEX",
+            "SUM",
+            "SYS_GUID",
+            "UUID",
+            "TAN",
+            "TIME",
+            "TIME_BUCKET",
+            "TIME_FORMAT",
+            "TIME_TO_SEC",
+            "TIMEDIFF",
+            "TIMESTAMP",
+            "TIMESTAMPADD",
+            "TIMESTAMPDIFF",
+            "TO_BASE64",
+            "TO_CHAR",
+            "TO_DATE",
+            "TO_DAYS",
+            "TO_JSON",
+            "TO_NUMBER",
+            "TO_SECONDS",
+            "TO_TIMESTAMP",
+            "TRIM",
+            "TRUNC",
+            "TRUNCATE",
+            "UCASE",
+            "UNHEX",
+            "UNIX_TIMESTAMP",
+            "USER",
+            "UTC_DATE",
+            "UTC_TIME",
+            "UTC_TIMESTAMP",
+            "UUID_TO_BIN",
+            "VARIANCE",
+            "VAR_SAMP",
+            "VECTOR_ADD",
+            "VECTOR_ELEMENTS_SUM",
+            "VECTOR_KTH_ELEMENT",
+            "VECTOR_MUL",
+            "VECTOR_NUM_ELEMENTS",
+            "VECTOR_SORT",
+            "VECTOR_SUB",
+            "VECTOR_SUBVECTOR",
+            "VECTOR_SUM",
+            "WEEK",
+            "WEEKDAY",
+            "YEAR",
+        ]
+
+        if (database_name := cls.get_default_schema(database, None)) is not 
None:
+            df = database.get_df(
+                f"SHOW FUNCTIONS IN `{database_name.replace('`', '``')}`"
+            )
+
+            function_name_col = f"Functions_in_{database_name}"
+            if function_name_col in df:
+                functions += df[function_name_col].tolist()
+            else:
+                columns = df.columns.values.tolist()
+                logger.error(
+                    "Payload from `SHOW FUNCTIONS` has the incorrect format. "
+                    "Expected column `%s`, found: %s.",
+                    function_name_col,
+                    ", ".join(columns),
+                    exc_info=True,
+                )

Review Comment:
   ### Unnecessary traceback logging <sub>![category Error 
Handling](https://img.shields.io/badge/Error%20Handling-ea580c)</sub>
   
   <details>
     <summary>Tell me more</summary>
   
   ###### What is the issue?
   The error logging includes exc_info=True but there is no active exception 
being handled, making the traceback information empty and unhelpful.
   
   ###### Why this matters
   Including exc_info=True without an active exception context adds unnecessary 
overhead and provides no additional debugging value.
   
   ###### Suggested change ∙ *Feature Preview*
   ```python
           columns = df.columns.values.tolist()
           logger.error(
               "Payload from `SHOW FUNCTIONS` has the incorrect format. "
               "Expected column `%s`, found: %s.",
               function_name_col,
               ", ".join(columns)
           )
   ```
   
   
   ###### Provide feedback to improve future suggestions
   [![Nice 
Catch](https://img.shields.io/badge/👍%20Nice%20Catch-71BC78)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146/upvote)
 
[![Incorrect](https://img.shields.io/badge/👎%20Incorrect-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_not_true=true)
  [![Not in 
Scope](https://img.shields.io/badge/👎%20Out%20of%20PR%20scope-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_out_of_scope=true)
 [![Not in coding 
standard](https://img.shields.io/badge/👎%20Not%20in%20our%20standards-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146?what_not_in_standard=true)
 
[![Other](https://img.shields.io/badge/👎%20Other-white)](https://app.korbit.ai/feedback/aa91ff46-6083-4491-9416-b83dd1994b51/f06cc372-5e4c-44f1-b1c0-9076012e5146)
   </details>
   
   <sub>
   
   💬 Looking for more details? Reply to this comment to chat with Korbit.
   </sub>
   
   <!--- korbi internal id:67c0e461-d3a1-4948-8bb2-325b47d65b38 -->
   
   [](67c0e461-d3a1-4948-8bb2-325b47d65b38)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] feat(database): add SingleStore engine specification [superset]

Reply via email to