[GitHub] [superset] villebro commented on a change in pull request #13294: feat(explore): Postgres datatype conversion

GitBox Thu, 11 Mar 2021 01:45:29 -0800


villebro commented on a change in pull request #13294:
URL: https://github.com/apache/superset/pull/13294#discussion_r592204273




##########
File path: superset/db_engine_specs/base.py
##########
@@ -162,21 +242,17 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
 
     # default matching patterns to convert database specific column types to
     # more generic types
-    db_column_types: Dict[utils.GenericDataType, Tuple[Pattern[str], ...]] = {
-        utils.GenericDataType.NUMERIC: (
+    db_column_types: Dict[GenericDataType, Tuple[Pattern[str], ...]] = {
+        GenericDataType.NUMERIC: (

Review comment:
       Do we need this map anymore? I believe this can be achieved with 
`get_column_spec`

##########
File path: superset/db_engine_specs/base.py
##########
@@ -210,7 +286,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) 
-> Exception:
 
     @classmethod
     def is_db_column_type_match(
-        cls, db_column_type: Optional[str], target_column_type: 
utils.GenericDataType
+        cls, db_column_type: Optional[str], target_column_type: GenericDataType

Review comment:
       I believe we can remove this method, as I don't see it being used anymore

##########
File path: tests/db_engine_specs/mssql_tests.py
##########
@@ -24,49 +24,55 @@
 
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.db_engine_specs.mssql import MssqlEngineSpec
+from superset.utils.core import GenericDataType
 from tests.db_engine_specs.base_tests import TestDbEngineSpec
 
 
 class TestMssqlEngineSpec(TestDbEngineSpec):
     def test_mssql_column_types(self):
-        def assert_type(type_string, type_expected):
-            type_assigned = MssqlEngineSpec.get_sqla_column_type(type_string)
+        def assert_type(type_string, type_expected, generic_type_expected):
             if type_expected is None:
+                type_assigned = 
MssqlEngineSpec.get_sqla_column_type(type_string)
                 self.assertIsNone(type_assigned)
             else:
-                self.assertIsInstance(type_assigned, type_expected)
+                column_spec = MssqlEngineSpec.get_sqla_column_type(type_string)
+                if not column_spec is None:
+                    (type_assigned, generic_type_assigned,) = column_spec
+                    self.assertIsInstance(type_assigned, type_expected)
+                    self.assertIsInstance(generic_type_assigned, 
generic_type_expected)
 
-        assert_type("INT", None)
-        assert_type("STRING", String)
-        assert_type("CHAR(10)", String)
-        assert_type("VARCHAR(10)", String)
-        assert_type("TEXT", String)
-        assert_type("NCHAR(10)", UnicodeText)
-        assert_type("NVARCHAR(10)", UnicodeText)
-        assert_type("NTEXT", UnicodeText)
+        # assert_type("STRING", String, GenericDataType.STRING)
+        # assert_type("CHAR(10)", String, GenericDataType.STRING)
+        # assert_type("VARCHAR(10)", String, GenericDataType.STRING)
+        # assert_type("TEXT", String, GenericDataType.STRING)
+        # assert_type("NCHAR(10)", UnicodeText, GenericDataType.STRING)
+        # assert_type("NVARCHAR(10)", UnicodeText, GenericDataType.STRING)
+        # assert_type("NTEXT", UnicodeText, GenericDataType.STRING)

Review comment:
       Why are these removed? couldn't we assert this using `get_column_spec`?

##########
File path: tests/db_engine_specs/mssql_tests.py
##########
@@ -24,49 +24,55 @@
 
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.db_engine_specs.mssql import MssqlEngineSpec
+from superset.utils.core import GenericDataType
 from tests.db_engine_specs.base_tests import TestDbEngineSpec
 
 
 class TestMssqlEngineSpec(TestDbEngineSpec):
     def test_mssql_column_types(self):
-        def assert_type(type_string, type_expected):
-            type_assigned = MssqlEngineSpec.get_sqla_column_type(type_string)
+        def assert_type(type_string, type_expected, generic_type_expected):
             if type_expected is None:
+                type_assigned = 
MssqlEngineSpec.get_sqla_column_type(type_string)
                 self.assertIsNone(type_assigned)
             else:
-                self.assertIsInstance(type_assigned, type_expected)
+                column_spec = MssqlEngineSpec.get_sqla_column_type(type_string)
+                if not column_spec is None:
+                    (type_assigned, generic_type_assigned,) = column_spec
+                    self.assertIsInstance(type_assigned, type_expected)
+                    self.assertIsInstance(generic_type_assigned, 
generic_type_expected)
 
-        assert_type("INT", None)
-        assert_type("STRING", String)
-        assert_type("CHAR(10)", String)
-        assert_type("VARCHAR(10)", String)
-        assert_type("TEXT", String)
-        assert_type("NCHAR(10)", UnicodeText)
-        assert_type("NVARCHAR(10)", UnicodeText)
-        assert_type("NTEXT", UnicodeText)
+        # assert_type("STRING", String, GenericDataType.STRING)
+        # assert_type("CHAR(10)", String, GenericDataType.STRING)
+        # assert_type("VARCHAR(10)", String, GenericDataType.STRING)
+        # assert_type("TEXT", String, GenericDataType.STRING)
+        # assert_type("NCHAR(10)", UnicodeText, GenericDataType.STRING)
+        # assert_type("NVARCHAR(10)", UnicodeText, GenericDataType.STRING)
+        # assert_type("NTEXT", UnicodeText, GenericDataType.STRING)
 
-    def test_where_clause_n_prefix(self):
-        dialect = mssql.dialect()
-        spec = MssqlEngineSpec
-        str_col = column("col", type_=spec.get_sqla_column_type("VARCHAR(10)"))
-        unicode_col = column("unicode_col", 
type_=spec.get_sqla_column_type("NTEXT"))
-        tbl = table("tbl")
-        sel = (
-            select([str_col, unicode_col])
-            .select_from(tbl)
-            .where(str_col == "abc")
-            .where(unicode_col == "abc")
-        )
+    # def test_where_clause_n_prefix(self):
+    #     dialect = mssql.dialect()
+    #     spec = MssqlEngineSpec
+    #     type_, _ = spec.get_sqla_column_type("VARCHAR(10)")
+    #     str_col = column("col", type_=type_)
+    #     type_, _ = spec.get_sqla_column_type("NTEXT")
+    #     unicode_col = column("unicode_col", type_=type_)
+    #     tbl = table("tbl")
+    #     sel = (
+    #         select([str_col, unicode_col])
+    #         .select_from(tbl)
+    #         .where(str_col == "abc")
+    #         .where(unicode_col == "abc")
+    #     )
 
-        query = str(
-            sel.compile(dialect=dialect, compile_kwargs={"literal_binds": 
True})
-        )
-        query_expected = (
-            "SELECT col, unicode_col \n"
-            "FROM tbl \n"
-            "WHERE col = 'abc' AND unicode_col = N'abc'"
-        )
-        self.assertEqual(query, query_expected)
+    #     query = str(
+    #         sel.compile(dialect=dialect, compile_kwargs={"literal_binds": 
True})
+    #     )
+    #     query_expected = (
+    #         "SELECT col, unicode_col \n"
+    #         "FROM tbl \n"
+    #         "WHERE col = 'abc' AND unicode_col = N'abc'"
+    #     )
+    #     self.assertEqual(query, query_expected)

Review comment:
       This is an important test - if it doesn't work we need to make sure it 
does

##########
File path: superset/db_engine_specs/base.py
##########
@@ -145,8 +146,87 @@ class BaseEngineSpec:  # pylint: 
disable=too-many-public-methods
     _date_trunc_functions: Dict[str, str] = {}
     _time_grain_expressions: Dict[Optional[str], str] = {}
     column_type_mappings: Tuple[
-        Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], 
TypeEngine]]], ...,
-    ] = ()
+        Tuple[
+            Pattern[str],
+            Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+            GenericDataType,
+        ],
+        ...,
+    ] = (
+        (
+            re.compile(r"^smallint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^integer", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigint", re.IGNORECASE),
+            types.BigInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^decimal", re.IGNORECASE),
+            types.Numeric(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^numeric", re.IGNORECASE),
+            types.Numeric(),
+            GenericDataType.NUMERIC,
+        ),
+        (re.compile(r"^real", re.IGNORECASE), types.REAL, 
GenericDataType.NUMERIC,),
+        (
+            re.compile(r"^smallserial", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^serial", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigserial", re.IGNORECASE),
+            types.BigInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE),
+            UnicodeText(),
+            utils.GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE),
+            String(),
+            utils.GenericDataType.STRING,
+        ),
+        (re.compile(r"^date", re.IGNORECASE), types.Date(), 
GenericDataType.TEMPORAL,),
+        (
+            re.compile(r"^timestamp", re.IGNORECASE),
+            types.TIMESTAMP(),
+            GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^timestamptz", re.IGNORECASE),
+            types.TIMESTAMP(timezone=True),
+            GenericDataType.TEMPORAL,
+        ),

Review comment:
       `^timestamptz` would never be caught as it's already matching 
`^timestamp` above. I suggest removing `timestamptz` support for now (timezones 
aren't really properly supported yet).

##########
File path: tests/databases/commands_tests.py
##########
@@ -259,7 +259,8 @@ def test_export_database_command(self, mock_g):
             "version": "1.0.0",
         }
         expected_metadata["columns"].sort(key=lambda x: x["column_name"])
-        assert metadata == expected_metadata
+        self.maxDiff = None
+        self.assertEquals(metadata, expected_metadata)

Review comment:
       Let's not change this
   ```suggestion
           assert metadata == expected_metadata
   ```

##########
File path: tests/db_engine_specs/mssql_tests.py
##########
@@ -134,9 +140,9 @@ def test_column_datatype_to_string(self):
             (DATE(), "DATE"),
             (VARCHAR(length=255), "VARCHAR(255)"),
             (VARCHAR(length=255, collation="utf8_general_ci"), "VARCHAR(255)"),
-            (NVARCHAR(length=128), "NVARCHAR(128)"),
+            # (NVARCHAR(length=128), "NVARCHAR(128)"),
             (TEXT(), "TEXT"),
-            (NTEXT(collation="utf8_general_ci"), "NTEXT"),
+            # (NTEXT(collation="utf8_general_ci"), "NTEXT"),

Review comment:
       This functionality+test can be removed later, but I believe we need to 
make a db migration to resize the type column in the metadata (not sure if it 
was already).




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [superset] villebro commented on a change in pull request #13294: feat(explore): Postgres datatype conversion

Reply via email to